Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
D
dataease
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
njgzx
dataease
Commits
a02df9d8
提交
a02df9d8
authored
4月 13, 2021
作者:
junjie
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(backend):spark cache 初步实现
上级
a11eb21c
显示空白字符变更
内嵌
并排
正在显示
9 个修改的文件
包含
332 行增加
和
109 行删除
+332
-109
CommonThreadPool.java
...main/java/io/dataease/commons/utils/CommonThreadPool.java
+99
-0
CommonConfig.java
backend/src/main/java/io/dataease/config/CommonConfig.java
+13
-14
AppStartListener.java
.../src/main/java/io/dataease/listener/AppStartListener.java
+2
-0
AppStartReadHBaseListener.java
.../java/io/dataease/listener/AppStartReadHBaseListener.java
+58
-0
ChartViewService.java
...main/java/io/dataease/service/chart/ChartViewService.java
+7
-3
DataSetTableFieldsService.java
...o/dataease/service/dataset/DataSetTableFieldsService.java
+6
-0
ExtractDataService.java
.../java/io/dataease/service/dataset/ExtractDataService.java
+40
-34
CacheUtil.java
...nd/src/main/java/io/dataease/service/spark/CacheUtil.java
+53
-0
SparkCalc.java
...nd/src/main/java/io/dataease/service/spark/SparkCalc.java
+54
-58
没有找到文件。
backend/src/main/java/io/dataease/commons/utils/CommonThreadPool.java
0 → 100644
浏览文件 @
a02df9d8
package
io
.
dataease
.
commons
.
utils
;
import
javax.annotation.PostConstruct
;
import
javax.annotation.PreDestroy
;
import
java.util.concurrent.*
;
/**
* @Author gin
* @Date 2021/4/13 4:08 下午
*/
public
class
CommonThreadPool
{
private
int
corePoolSize
=
10
;
private
int
maxQueueSize
=
10
;
private
int
keepAliveSeconds
=
600
;
private
ScheduledThreadPoolExecutor
scheduledThreadPoolExecutor
;
@PostConstruct
public
void
init
()
{
scheduledThreadPoolExecutor
=
new
ScheduledThreadPoolExecutor
(
corePoolSize
);
scheduledThreadPoolExecutor
.
setKeepAliveTime
(
keepAliveSeconds
,
TimeUnit
.
SECONDS
);
}
@PreDestroy
public
void
shutdown
()
{
if
(
scheduledThreadPoolExecutor
!=
null
)
{
scheduledThreadPoolExecutor
.
shutdown
();
}
}
/**
* 线程池是否可用(实际队列数是否小于最大队列数)
*
* @return true为可用,false不可用
*/
public
boolean
available
()
{
return
scheduledThreadPoolExecutor
.
getQueue
().
size
()
<=
maxQueueSize
;
}
/**
* 添加任务,不强制限制队列数
*
* @param task 任务
*/
public
void
addTask
(
Runnable
task
)
{
scheduledThreadPoolExecutor
.
execute
(
task
);
}
/**
* 添加延迟执行任务,不强制限制队列数
*
* @param task 任务
* @param delay 延迟时间
* @param unit 延迟时间单位
*/
public
void
scheduleTask
(
Runnable
task
,
long
delay
,
TimeUnit
unit
)
{
scheduledThreadPoolExecutor
.
schedule
(
task
,
delay
,
unit
);
}
/**
* 添加任务和超时时间(超时时间内未执行完的任务将被终止并移除线程池,防止任务执行时间过长而占用线程池)
*
* @param task 任务
* @param timeOut 超时时间
* @param timeUnit 超时时间单位
*/
public
void
addTask
(
Runnable
task
,
long
timeOut
,
TimeUnit
timeUnit
)
{
scheduledThreadPoolExecutor
.
execute
(()
->
{
ExecutorService
executorService
=
Executors
.
newSingleThreadExecutor
();
try
{
Future
future
=
executorService
.
submit
(
task
);
future
.
get
(
timeOut
,
timeUnit
);
// 此行会阻塞,直到任务执行完或超时
}
catch
(
TimeoutException
timeoutException
)
{
LogUtil
.
getLogger
().
error
(
"timeout to execute task"
,
timeoutException
);
}
catch
(
Exception
exception
)
{
LogUtil
.
getLogger
().
error
(
"failed to execute task"
,
exception
);
}
finally
{
if
(!
executorService
.
isShutdown
())
{
executorService
.
shutdown
();
}
}
});
}
public
void
setCorePoolSize
(
int
corePoolSize
)
{
this
.
corePoolSize
=
corePoolSize
;
}
public
void
setMaxQueueSize
(
int
maxQueueSize
)
{
this
.
maxQueueSize
=
maxQueueSize
;
}
public
void
setKeepAliveSeconds
(
int
keepAliveSeconds
)
{
this
.
keepAliveSeconds
=
keepAliveSeconds
;
}
}
backend/src/main/java/io/dataease/config/CommonConfig.java
浏览文件 @
a02df9d8
package
io
.
dataease
.
config
;
import
com.fit2cloud.autoconfigure.QuartzAutoConfiguration
;
import
io.dataease.commons.utils.CommonThreadPool
;
import
org.apache.spark.api.java.JavaSparkContext
;
import
org.apache.spark.sql.SQLContext
;
import
org.apache.spark.sql.SparkSession
;
...
...
@@ -33,31 +34,20 @@ public class CommonConfig {
return
configuration
;
}
@Bean
@ConditionalOnMissingBean
public
JavaSparkContext
javaSparkContext
()
{
public
SparkSession
javaSparkSession
()
{
SparkSession
spark
=
SparkSession
.
builder
()
.
appName
(
env
.
getProperty
(
"spark.appName"
,
"DataeaseJob"
))
.
master
(
env
.
getProperty
(
"spark.master"
,
"local[*]"
))
.
config
(
"spark.scheduler.mode"
,
"FAIR"
)
.
getOrCreate
();
JavaSparkContext
sc
=
new
JavaSparkContext
(
spark
.
sparkContext
());
return
sc
;
return
spark
;
}
@Bean
@ConditionalOnMissingBean
public
SQLContext
sqlContext
(
JavaSparkContext
javaSparkContext
)
{
SQLContext
sqlContext
=
new
SQLContext
(
javaSparkContext
);
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
return
sqlContext
;
}
@Bean
@ConditionalOnMissingBean
public
KettleFileRepository
kettleFileRepository
()
throws
Exception
{
public
KettleFileRepository
kettleFileRepository
()
throws
Exception
{
KettleEnvironment
.
init
();
KettleFileRepository
repository
=
new
KettleFileRepository
();
KettleFileRepositoryMeta
kettleDatabaseMeta
=
new
KettleFileRepositoryMeta
(
"KettleFileRepository"
,
"repo"
,
...
...
@@ -65,4 +55,13 @@ public class CommonConfig {
repository
.
init
(
kettleDatabaseMeta
);
return
repository
;
}
@Bean
(
destroyMethod
=
"shutdown"
)
public
CommonThreadPool
resourcePoolThreadPool
()
{
CommonThreadPool
commonThreadPool
=
new
CommonThreadPool
();
commonThreadPool
.
setCorePoolSize
(
20
);
commonThreadPool
.
setMaxQueueSize
(
100
);
commonThreadPool
.
setKeepAliveSeconds
(
3600
);
return
commonThreadPool
;
}
}
backend/src/main/java/io/dataease/listener/AppStartListener.java
浏览文件 @
a02df9d8
...
...
@@ -6,12 +6,14 @@ import io.dataease.service.ScheduleService;
import
io.dataease.service.dataset.DataSetTableTaskService
;
import
org.springframework.boot.context.event.ApplicationReadyEvent
;
import
org.springframework.context.ApplicationListener
;
import
org.springframework.core.annotation.Order
;
import
org.springframework.stereotype.Component
;
import
javax.annotation.Resource
;
import
java.util.List
;
@Component
@Order
(
value
=
1
)
public
class
AppStartListener
implements
ApplicationListener
<
ApplicationReadyEvent
>
{
@Resource
private
ScheduleService
scheduleService
;
...
...
backend/src/main/java/io/dataease/listener/AppStartReadHBaseListener.java
0 → 100644
浏览文件 @
a02df9d8
package
io
.
dataease
.
listener
;
import
io.dataease.base.domain.DatasetTable
;
import
io.dataease.base.domain.DatasetTableExample
;
import
io.dataease.base.domain.DatasetTableField
;
import
io.dataease.base.domain.DatasetTableFieldExample
;
import
io.dataease.base.mapper.DatasetTableFieldMapper
;
import
io.dataease.base.mapper.DatasetTableMapper
;
import
io.dataease.commons.utils.CommonBeanFactory
;
import
io.dataease.commons.utils.CommonThreadPool
;
import
io.dataease.service.dataset.DataSetTableFieldsService
;
import
io.dataease.service.spark.SparkCalc
;
import
org.apache.spark.api.java.JavaSparkContext
;
import
org.apache.spark.sql.SQLContext
;
import
org.apache.spark.sql.SparkSession
;
import
org.springframework.boot.context.event.ApplicationReadyEvent
;
import
org.springframework.context.ApplicationListener
;
import
org.springframework.core.annotation.Order
;
import
org.springframework.core.env.Environment
;
import
org.springframework.stereotype.Component
;
import
javax.annotation.Resource
;
import
java.util.List
;
@Component
@Order
(
value
=
2
)
public
class
AppStartReadHBaseListener
implements
ApplicationListener
<
ApplicationReadyEvent
>
{
@Resource
private
CommonThreadPool
commonThreadPool
;
@Resource
private
SparkCalc
sparkCalc
;
@Resource
private
Environment
env
;
// 保存了配置文件的信息
@Resource
private
DatasetTableMapper
datasetTableMapper
;
@Resource
private
DataSetTableFieldsService
dataSetTableFieldsService
;
@Override
public
void
onApplicationEvent
(
ApplicationReadyEvent
applicationReadyEvent
)
{
System
.
out
.
println
(
"================= Read HBase start ================="
);
// 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
DatasetTableExample
datasetTableExample
=
new
DatasetTableExample
();
datasetTableExample
.
createCriteria
().
andModeEqualTo
(
1
);
List
<
DatasetTable
>
datasetTables
=
datasetTableMapper
.
selectByExampleWithBLOBs
(
datasetTableExample
);
for
(
DatasetTable
table
:
datasetTables
)
{
commonThreadPool
.
addTask
(()
->
{
try
{
List
<
DatasetTableField
>
fields
=
dataSetTableFieldsService
.
getFieldsByTableId
(
table
.
getId
());
sparkCalc
.
getHBaseDataAndCache
(
table
.
getId
(),
fields
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
});
}
}
}
backend/src/main/java/io/dataease/service/chart/ChartViewService.java
浏览文件 @
a02df9d8
...
...
@@ -4,6 +4,7 @@ import com.google.gson.Gson;
import
com.google.gson.reflect.TypeToken
;
import
io.dataease.base.domain.*
;
import
io.dataease.base.mapper.ChartViewMapper
;
import
io.dataease.base.mapper.DatasetTableFieldMapper
;
import
io.dataease.commons.utils.AuthUtils
;
import
io.dataease.commons.utils.BeanUtils
;
import
io.dataease.controller.request.chart.ChartViewRequest
;
...
...
@@ -16,6 +17,7 @@ import io.dataease.dto.chart.ChartViewDTO;
import
io.dataease.dto.chart.ChartViewFieldDTO
;
import
io.dataease.dto.chart.Series
;
import
io.dataease.dto.dataset.DataTableInfoDTO
;
import
io.dataease.service.dataset.DataSetTableFieldsService
;
import
io.dataease.service.dataset.DataSetTableService
;
import
io.dataease.service.spark.SparkCalc
;
import
org.apache.commons.collections4.CollectionUtils
;
...
...
@@ -41,6 +43,8 @@ public class ChartViewService {
private
DatasourceService
datasourceService
;
@Resource
private
SparkCalc
sparkCalc
;
@Resource
private
DataSetTableFieldsService
dataSetTableFieldsService
;
public
ChartViewWithBLOBs
save
(
ChartViewWithBLOBs
chartView
)
{
checkName
(
chartView
);
...
...
@@ -121,9 +125,9 @@ public class ChartViewService {
}
data
=
datasourceProvider
.
getData
(
datasourceRequest
);
}
else
if
(
table
.
getMode
()
==
1
)
{
// 抽取
// DataTableInfoDTO dataTableInfoDTO = new Gson().fromJson(table.getInfo(), DataTableInfoDTO.class);
// String tableName = dataTableInfoDTO.getTable() + "-" + table.getDataSourceId();// todo hBase table name maybe change
data
=
sparkCalc
.
getData
(
table
.
getId
(),
xAxis
,
yAxis
,
"tmp_"
+
view
.
getId
().
split
(
"-"
)[
0
]);
// 获取数据集de字段
List
<
DatasetTableField
>
fields
=
dataSetTableFieldsService
.
getFieldsByTableId
(
table
.
getId
());
data
=
sparkCalc
.
getData
(
table
.
getId
(),
fields
,
xAxis
,
yAxis
,
"tmp_"
+
view
.
getId
().
split
(
"-"
)[
0
]);
}
// 图表组件可再扩展
...
...
backend/src/main/java/io/dataease/service/dataset/DataSetTableFieldsService.java
浏览文件 @
a02df9d8
...
...
@@ -60,4 +60,10 @@ public class DataSetTableFieldsService {
datasetTableFieldExample
.
createCriteria
().
andIdIn
(
ids
);
return
datasetTableFieldMapper
.
selectByExample
(
datasetTableFieldExample
);
}
public
List
<
DatasetTableField
>
getFieldsByTableId
(
String
id
)
{
DatasetTableFieldExample
datasetTableFieldExample
=
new
DatasetTableFieldExample
();
datasetTableFieldExample
.
createCriteria
().
andTableIdEqualTo
(
id
);
return
datasetTableFieldMapper
.
selectByExample
(
datasetTableFieldExample
);
}
}
backend/src/main/java/io/dataease/service/dataset/ExtractDataService.java
浏览文件 @
a02df9d8
...
...
@@ -13,6 +13,7 @@ import io.dataease.datasource.constants.DatasourceTypes;
import
io.dataease.datasource.dto.MysqlConfigrationDTO
;
import
io.dataease.dto.dataset.DataSetTaskLogDTO
;
import
io.dataease.dto.dataset.DataTableInfoDTO
;
import
io.dataease.service.spark.SparkCalc
;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.io.FileUtils
;
import
org.apache.commons.lang3.StringUtils
;
...
...
@@ -120,6 +121,9 @@ public class ExtractDataService {
@Value
(
"${hbase.zookeeper.property.clientPort:2181}"
)
private
String
zkPort
;
@Resource
private
SparkCalc
sparkCalc
;
public
void
extractData
(
String
datasetTableId
,
String
taskId
,
String
type
)
{
DatasetTableTaskLog
datasetTableTaskLog
=
new
DatasetTableTaskLog
();
...
...
@@ -131,60 +135,62 @@ public class ExtractDataService {
List
<
DatasetTableField
>
datasetTableFields
=
dataSetTableFieldsService
.
list
(
DatasetTableField
.
builder
().
tableId
(
datasetTable
.
getId
()).
build
());
String
table
=
new
Gson
().
fromJson
(
datasetTable
.
getInfo
(),
DataTableInfoDTO
.
class
).
getTable
();
TableName
hbaseTable
=
TableName
.
valueOf
(
datasetTableId
);
switch
(
updateType
){
switch
(
updateType
)
{
// 全量更新
case
all_scope:
writeDatasetTableTaskLog
(
datasetTableTaskLog
,
datasetTableId
,
taskId
);
//check pentaho_mappings table
TableName
pentaho_mappings
=
TableName
.
valueOf
(
this
.
pentaho_mappings
);
if
(!
admin
.
tableExists
(
pentaho_mappings
))
{
creatHaseTable
(
pentaho_mappings
,
admin
,
Arrays
.
asList
(
"columns"
,
"key"
));
if
(!
admin
.
tableExists
(
pentaho_mappings
))
{
creatHaseTable
(
pentaho_mappings
,
admin
,
Arrays
.
asList
(
"columns"
,
"key"
));
}
//check pentaho files
if
(!
isExitFile
(
"job_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_"
+
datasetTableId
+
".ktr"
))
{
if
(!
isExitFile
(
"job_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_"
+
datasetTableId
+
".ktr"
))
{
generateTransFile
(
"all_scope"
,
datasetTable
,
datasource
,
table
,
datasetTableFields
,
null
);
generateJobFile
(
"all_scope"
,
datasetTable
);
}
if
(!
admin
.
tableExists
(
hbaseTable
))
{
if
(!
admin
.
tableExists
(
hbaseTable
))
{
creatHaseTable
(
hbaseTable
,
admin
,
Arrays
.
asList
(
dataease_column_family
));
}
admin
.
disableTable
(
hbaseTable
);
admin
.
truncateTable
(
hbaseTable
,
true
);
extractData
(
datasetTable
,
"all_scope"
);
// after sync complete,read data to cache from HBase
sparkCalc
.
getHBaseDataAndCache
(
datasetTableId
,
dataSetTableFieldsService
.
getFieldsByTableId
(
datasetTableId
));
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
break
;
case
add_scope:
// 增量更新
if
(!
admin
.
tableExists
(
hbaseTable
))
{
if
(!
admin
.
tableExists
(
hbaseTable
))
{
LogUtil
.
error
(
"TableName error, dataaset: "
+
datasetTableId
);
return
;
}
DatasetTableIncrementalConfig
datasetTableIncrementalConfig
=
dataSetTableService
.
incrementalConfig
(
datasetTableId
);
if
(
datasetTableIncrementalConfig
==
null
||
StringUtils
.
isEmpty
(
datasetTableIncrementalConfig
.
getTableId
()))
{
if
(
datasetTableIncrementalConfig
==
null
||
StringUtils
.
isEmpty
(
datasetTableIncrementalConfig
.
getTableId
()))
{
return
;
}
DatasetTableTaskLog
request
=
new
DatasetTableTaskLog
();
request
.
setTableId
(
datasetTableId
);
request
.
setStatus
(
JobStatus
.
Completed
.
name
());
List
<
DataSetTaskLogDTO
>
dataSetTaskLogDTOS
=
dataSetTableTaskLogService
.
list
(
request
);
if
(
CollectionUtils
.
isEmpty
(
dataSetTaskLogDTOS
))
{
if
(
CollectionUtils
.
isEmpty
(
dataSetTaskLogDTOS
))
{
return
;
}
writeDatasetTableTaskLog
(
datasetTableTaskLog
,
datasetTableId
,
taskId
);
writeDatasetTableTaskLog
(
datasetTableTaskLog
,
datasetTableId
,
taskId
);
// 增量添加
if
(
StringUtils
.
isNotEmpty
(
datasetTableIncrementalConfig
.
getIncrementalAdd
().
replace
(
" "
,
""
)))
{
if
(
StringUtils
.
isNotEmpty
(
datasetTableIncrementalConfig
.
getIncrementalAdd
().
replace
(
" "
,
""
)))
{
System
.
out
.
println
(
"datasetTableIncrementalConfig.getIncrementalAdd(): "
+
datasetTableIncrementalConfig
.
getIncrementalAdd
());
String
sql
=
datasetTableIncrementalConfig
.
getIncrementalAdd
().
replace
(
lastUpdateTime
,
dataSetTaskLogDTOS
.
get
(
0
).
getStartTime
().
toString
()
.
replace
(
currentUpdateTime
,
Long
.
valueOf
(
System
.
currentTimeMillis
()).
toString
()));
if
(!
isExitFile
(
"job_add_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_add_"
+
datasetTableId
+
".ktr"
))
{
if
(!
isExitFile
(
"job_add_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_add_"
+
datasetTableId
+
".ktr"
))
{
generateTransFile
(
"incremental_add"
,
datasetTable
,
datasource
,
table
,
datasetTableFields
,
sql
);
generateJobFile
(
"incremental_add"
,
datasetTable
);
}
...
...
@@ -193,39 +199,39 @@ public class ExtractDataService {
}
// 增量删除
if
(
StringUtils
.
isNotEmpty
(
datasetTableIncrementalConfig
.
getIncrementalDelete
()))
{
if
(
StringUtils
.
isNotEmpty
(
datasetTableIncrementalConfig
.
getIncrementalDelete
()))
{
String
sql
=
datasetTableIncrementalConfig
.
getIncrementalDelete
().
replace
(
lastUpdateTime
,
dataSetTaskLogDTOS
.
get
(
0
).
getStartTime
().
toString
()
.
replace
(
currentUpdateTime
,
Long
.
valueOf
(
System
.
currentTimeMillis
()).
toString
()));
if
(!
isExitFile
(
"job_delete_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_delete_"
+
datasetTableId
+
".ktr"
))
{
if
(!
isExitFile
(
"job_delete_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_delete_"
+
datasetTableId
+
".ktr"
))
{
generateTransFile
(
"incremental_delete"
,
datasetTable
,
datasource
,
table
,
datasetTableFields
,
sql
);
generateJobFile
(
"incremental_delete"
,
datasetTable
);
}
extractData
(
datasetTable
,
"incremental_delete"
);
}
// after sync complete,read data to cache from HBase
sparkCalc
.
getHBaseDataAndCache
(
datasetTableId
,
dataSetTableFieldsService
.
getFieldsByTableId
(
datasetTableId
));
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
break
;
}
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
LogUtil
.
error
(
"ExtractData error, dataaset: "
+
datasetTableId
);
LogUtil
.
error
(
e
.
getMessage
(),
e
);
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Error
.
name
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
}
finally
{
}
finally
{
DatasetTableTask
datasetTableTask
=
dataSetTableTaskService
.
get
(
taskId
);
if
(
datasetTableTask
!=
null
&&
datasetTableTask
.
getRate
().
equalsIgnoreCase
(
ScheduleType
.
SIMPLE
.
toString
())){
if
(
datasetTableTask
!=
null
&&
datasetTableTask
.
getRate
().
equalsIgnoreCase
(
ScheduleType
.
SIMPLE
.
toString
()))
{
datasetTableTask
.
setRate
(
ScheduleType
.
SIMPLE_COMPLETE
.
toString
());
dataSetTableTaskService
.
update
(
datasetTableTask
);
}
}
}
private
void
writeDatasetTableTaskLog
(
DatasetTableTaskLog
datasetTableTaskLog
,
String
datasetTableId
,
String
taskId
){
private
void
writeDatasetTableTaskLog
(
DatasetTableTaskLog
datasetTableTaskLog
,
String
datasetTableId
,
String
taskId
)
{
datasetTableTaskLog
.
setTableId
(
datasetTableId
);
datasetTableTaskLog
.
setTaskId
(
taskId
);
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Underway
.
name
());
...
...
@@ -233,7 +239,7 @@ public class ExtractDataService {
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
}
private
void
creatHaseTable
(
TableName
tableName
,
Admin
admin
,
List
<
String
>
columnFamily
)
throws
Exception
{
private
void
creatHaseTable
(
TableName
tableName
,
Admin
admin
,
List
<
String
>
columnFamily
)
throws
Exception
{
TableDescriptorBuilder
descBuilder
=
TableDescriptorBuilder
.
newBuilder
(
tableName
);
Collection
<
ColumnFamilyDescriptor
>
families
=
new
ArrayList
<>();
for
(
String
s
:
columnFamily
)
{
...
...
@@ -245,11 +251,11 @@ public class ExtractDataService {
admin
.
createTable
(
desc
);
}
private
void
extractData
(
DatasetTable
datasetTable
,
String
extractType
)
throws
Exception
{
private
void
extractData
(
DatasetTable
datasetTable
,
String
extractType
)
throws
Exception
{
KettleFileRepository
repository
=
CommonBeanFactory
.
getBean
(
KettleFileRepository
.
class
);
RepositoryDirectoryInterface
repositoryDirectoryInterface
=
repository
.
loadRepositoryDirectoryTree
();
JobMeta
jobMeta
=
null
;
switch
(
extractType
){
switch
(
extractType
)
{
case
"all_scope"
:
jobMeta
=
repository
.
loadJob
(
"job_"
+
datasetTable
.
getId
(),
repositoryDirectoryInterface
,
null
,
null
);
break
;
...
...
@@ -272,27 +278,27 @@ public class ExtractDataService {
do
{
jobStatus
=
remoteSlaveServer
.
getJobStatus
(
jobMeta
.
getName
(),
lastCarteObjectId
,
0
);
}
while
(
jobStatus
!=
null
&&
jobStatus
.
isRunning
());
if
(
jobStatus
.
getStatusDescription
().
equals
(
"Finished"
))
{
if
(
jobStatus
.
getStatusDescription
().
equals
(
"Finished"
))
{
return
;
}
else
{
}
else
{
throw
new
Exception
(
jobStatus
.
getLoggingString
());
}
}
private
synchronized
Connection
getConnection
()
throws
Exception
{
if
(
connection
==
null
||
connection
.
isClosed
())
{
private
synchronized
Connection
getConnection
()
throws
Exception
{
if
(
connection
==
null
||
connection
.
isClosed
())
{
Configuration
cfg
=
CommonBeanFactory
.
getBean
(
Configuration
.
class
);
connection
=
ConnectionFactory
.
createConnection
(
cfg
,
pool
);
}
return
connection
;
}
private
boolean
isExitFile
(
String
fileName
){
File
file
=
new
File
(
root_path
+
fileName
);
private
boolean
isExitFile
(
String
fileName
)
{
File
file
=
new
File
(
root_path
+
fileName
);
return
file
.
exists
();
}
private
SlaveServer
getSlaveServer
(){
private
SlaveServer
getSlaveServer
()
{
SlaveServer
remoteSlaveServer
=
new
SlaveServer
();
remoteSlaveServer
.
setHostname
(
carte
);
// 设置远程IP
remoteSlaveServer
.
setPort
(
port
);
// 端口
...
...
@@ -301,7 +307,7 @@ public class ExtractDataService {
return
remoteSlaveServer
;
}
private
void
generateJobFile
(
String
extractType
,
DatasetTable
datasetTable
)
throws
Exception
{
private
void
generateJobFile
(
String
extractType
,
DatasetTable
datasetTable
)
throws
Exception
{
String
jobName
=
null
;
switch
(
extractType
)
{
case
"all_scope"
:
...
...
@@ -364,11 +370,11 @@ public class ExtractDataService {
jobMeta
.
addJobHop
(
greenHop
);
String
jobXml
=
jobMeta
.
getXML
();
File
file
=
new
File
(
root_path
+
jobName
+
".kjb"
);
File
file
=
new
File
(
root_path
+
jobName
+
".kjb"
);
FileUtils
.
writeStringToFile
(
file
,
jobXml
,
"UTF-8"
);
}
private
void
generateTransFile
(
String
extractType
,
DatasetTable
datasetTable
,
Datasource
datasource
,
String
table
,
List
<
DatasetTableField
>
datasetTableFields
,
String
selectSQL
)
throws
Exception
{
private
void
generateTransFile
(
String
extractType
,
DatasetTable
datasetTable
,
Datasource
datasource
,
String
table
,
List
<
DatasetTableField
>
datasetTableFields
,
String
selectSQL
)
throws
Exception
{
TransMeta
transMeta
=
new
TransMeta
();
String
transName
=
null
;
switch
(
extractType
)
{
...
...
@@ -450,7 +456,7 @@ public class ExtractDataService {
RuntimeTestActionHandler
defaultHandler
=
null
;
RuntimeTestActionService
runtimeTestActionService
=
new
RuntimeTestActionServiceImpl
(
runtimeTestActionHandlers
,
defaultHandler
);
RuntimeTester
runtimeTester
=
new
RuntimeTesterImpl
(
new
ArrayList
<>(
Arrays
.
asList
(
mock
(
RuntimeTest
.
class
)
)
),
mock
(
ExecutorService
.
class
),
"modules"
);
RuntimeTester
runtimeTester
=
new
RuntimeTesterImpl
(
new
ArrayList
<>(
Arrays
.
asList
(
mock
(
RuntimeTest
.
class
))),
mock
(
ExecutorService
.
class
),
"modules"
);
Put
put
=
new
Put
((
datasetTable
.
getId
()
+
","
+
"target_mapping"
).
getBytes
());
for
(
DatasetTableField
datasetTableField
:
datasetTableFields
)
{
...
...
@@ -466,7 +472,7 @@ public class ExtractDataService {
hBaseOutputMeta
.
setTargetMappingName
(
"target_mapping"
);
hBaseOutputMeta
.
setNamedCluster
(
clusterTemplate
);
hBaseOutputMeta
.
setCoreConfigURL
(
hbase_conf_file
);
if
(
extractType
.
equalsIgnoreCase
(
"incremental_delete"
))
{
if
(
extractType
.
equalsIgnoreCase
(
"incremental_delete"
))
{
hBaseOutputMeta
.
setDeleteRowKey
(
true
);
}
StepMeta
tostep
=
new
StepMeta
(
"HBaseOutput"
,
"HBaseOutput"
,
hBaseOutputMeta
);
...
...
backend/src/main/java/io/dataease/service/spark/CacheUtil.java
0 → 100644
浏览文件 @
a02df9d8
package
io
.
dataease
.
service
.
spark
;
import
org.apache.spark.sql.Dataset
;
import
org.apache.spark.sql.Row
;
import
java.util.HashMap
;
import
java.util.Map
;
/**
* @Author gin
* @Date 2021/4/13 12:32 下午
*/
public
class
CacheUtil
{
private
static
CacheUtil
cacheUtil
;
private
static
Map
<
String
,
Dataset
<
Row
>>
cacheMap
;
private
CacheUtil
(){
cacheMap
=
new
HashMap
<
String
,
Dataset
<
Row
>>();
}
public
static
CacheUtil
getInstance
(){
if
(
cacheUtil
==
null
){
cacheUtil
=
new
CacheUtil
();
}
return
cacheUtil
;
}
/**
* 添加缓存
* @param key
* @param obj
*/
public
void
addCacheData
(
String
key
,
Dataset
<
Row
>
obj
){
cacheMap
.
put
(
key
,
obj
);
}
/**
* 取出缓存
* @param key
* @return
*/
public
Dataset
<
Row
>
getCacheData
(
String
key
){
return
cacheMap
.
get
(
key
);
}
/**
* 清楚缓存
* @param key
*/
public
void
removeCacheData
(
String
key
){
cacheMap
.
remove
(
key
);
}
}
backend/src/main/java/io/dataease/service/spark/SparkCalc.java
浏览文件 @
a02df9d8
package
io
.
dataease
.
service
.
spark
;
import
io.dataease.base.domain.DatasetTableField
;
import
io.dataease.commons.utils.CommonBeanFactory
;
import
io.dataease.dto.chart.ChartViewFieldDTO
;
import
org.apache.commons.collections4.CollectionUtils
;
import
org.apache.commons.lang3.ObjectUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.hadoop.conf.Configuration
;
import
org.apache.hadoop.hbase.client.Result
;
...
...
@@ -42,21 +44,56 @@ public class SparkCalc {
@Resource
private
Environment
env
;
// 保存了配置文件的信息
public
List
<
String
[]>
getData
(
String
hTable
,
List
<
ChartViewFieldDTO
>
xAxis
,
List
<
ChartViewFieldDTO
>
yAxis
,
String
tmpTable
)
throws
Exception
{
public
List
<
String
[]>
getData
(
String
hTable
,
List
<
DatasetTableField
>
fields
,
List
<
ChartViewFieldDTO
>
xAxis
,
List
<
ChartViewFieldDTO
>
yAxis
,
String
tmpTable
)
throws
Exception
{
// Spark Context
SparkSession
spark
=
CommonBeanFactory
.
getBean
(
SparkSession
.
class
);
JavaSparkContext
sparkContext
=
new
JavaSparkContext
(
spark
.
sparkContext
());
// Spark SQL Context
// SQLContext sqlContext = CommonBeanFactory.getBean(SQLContext.class);
SQLContext
sqlContext
=
new
SQLContext
(
sparkContext
);
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
Dataset
<
Row
>
dataFrame
=
CacheUtil
.
getInstance
().
getCacheData
(
hTable
);
if
(
ObjectUtils
.
isEmpty
(
dataFrame
))
{
dataFrame
=
getHBaseDataAndCache
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
}
dataFrame
.
createOrReplaceTempView
(
tmpTable
);
Dataset
<
Row
>
sql
=
sqlContext
.
sql
(
getSQL
(
xAxis
,
yAxis
,
tmpTable
));
// transform
List
<
String
[]>
data
=
new
ArrayList
<>();
List
<
Row
>
list
=
sql
.
collectAsList
();
for
(
Row
row
:
list
)
{
String
[]
r
=
new
String
[
row
.
length
()];
for
(
int
i
=
0
;
i
<
row
.
length
();
i
++)
{
r
[
i
]
=
row
.
get
(
i
)
==
null
?
"null"
:
row
.
get
(
i
).
toString
();
}
data
.
add
(
r
);
}
return
data
;
}
public
Dataset
<
Row
>
getHBaseDataAndCache
(
String
hTable
,
List
<
DatasetTableField
>
fields
)
throws
Exception
{
// Spark Context
SparkSession
spark
=
CommonBeanFactory
.
getBean
(
SparkSession
.
class
);
JavaSparkContext
sparkContext
=
new
JavaSparkContext
(
spark
.
sparkContext
());
// Spark SQL Context
// SQLContext sqlContext = CommonBeanFactory.getBean(SQLContext.class);
SQLContext
sqlContext
=
new
SQLContext
(
sparkContext
);
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
return
getHBaseDataAndCache
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
}
public
Dataset
<
Row
>
getHBaseDataAndCache
(
JavaSparkContext
sparkContext
,
SQLContext
sqlContext
,
String
hTable
,
List
<
DatasetTableField
>
fields
)
throws
Exception
{
Scan
scan
=
new
Scan
();
scan
.
addFamily
(
column_family
.
getBytes
());
ClientProtos
.
Scan
proto
=
ProtobufUtil
.
toScan
(
scan
);
String
scanToString
=
new
String
(
Base64
.
getEncoder
().
encode
(
proto
.
toByteArray
()));
// Spark Context
// JavaSparkContext sparkContext = CommonBeanFactory.getBean(JavaSparkContext.class);
SparkSession
spark
=
SparkSession
.
builder
()
.
appName
(
env
.
getProperty
(
"spark.appName"
,
"DataeaseJob"
))
.
master
(
env
.
getProperty
(
"spark.master"
,
"local[*]"
))
.
config
(
"spark.scheduler.mode"
,
"FAIR"
)
.
getOrCreate
();
JavaSparkContext
sparkContext
=
new
JavaSparkContext
(
spark
.
sparkContext
());
// HBase config
// Configuration conf = CommonBeanFactory.getBean(Configuration.class);
org
.
apache
.
hadoop
.
conf
.
Configuration
conf
=
new
org
.
apache
.
hadoop
.
conf
.
Configuration
();
...
...
@@ -73,7 +110,7 @@ public class SparkCalc {
while
(
tuple2Iterator
.
hasNext
())
{
Result
result
=
tuple2Iterator
.
next
().
_2
;
List
<
Object
>
list
=
new
ArrayList
<>();
xAxi
s
.
forEach
(
x
->
{
field
s
.
forEach
(
x
->
{
String
l
=
Bytes
.
toString
(
result
.
getValue
(
column_family
.
getBytes
(),
x
.
getOriginName
().
getBytes
()));
if
(
x
.
getDeType
()
==
0
||
x
.
getDeType
()
==
1
)
{
list
.
add
(
l
);
...
...
@@ -89,22 +126,6 @@ public class SparkCalc {
list
.
add
(
Double
.
valueOf
(
l
));
}
});
yAxis
.
forEach
(
y
->
{
String
l
=
Bytes
.
toString
(
result
.
getValue
(
column_family
.
getBytes
(),
y
.
getOriginName
().
getBytes
()));
if
(
y
.
getDeType
()
==
0
||
y
.
getDeType
()
==
1
)
{
list
.
add
(
l
);
}
else
if
(
y
.
getDeType
()
==
2
)
{
if
(
StringUtils
.
isEmpty
(
l
))
{
l
=
"0"
;
}
list
.
add
(
Long
.
valueOf
(
l
));
}
else
if
(
y
.
getDeType
()
==
3
)
{
if
(
StringUtils
.
isEmpty
(
l
))
{
l
=
"0.0"
;
}
list
.
add
(
Double
.
valueOf
(
l
));
}
});
iterator
.
add
(
RowFactory
.
create
(
list
.
toArray
()));
}
return
iterator
.
iterator
();
...
...
@@ -112,7 +133,7 @@ public class SparkCalc {
List
<
StructField
>
structFields
=
new
ArrayList
<>();
// struct顺序要与rdd顺序一致
xAxi
s
.
forEach
(
x
->
{
field
s
.
forEach
(
x
->
{
if
(
x
.
getDeType
()
==
0
||
x
.
getDeType
()
==
1
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
x
.
getOriginName
(),
DataTypes
.
StringType
,
true
));
}
else
if
(
x
.
getDeType
()
==
2
)
{
...
...
@@ -121,40 +142,15 @@ public class SparkCalc {
structFields
.
add
(
DataTypes
.
createStructField
(
x
.
getOriginName
(),
DataTypes
.
DoubleType
,
true
));
}
});
yAxis
.
forEach
(
y
->
{
if
(
y
.
getDeType
()
==
0
||
y
.
getDeType
()
==
1
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
y
.
getOriginName
(),
DataTypes
.
StringType
,
true
));
}
else
if
(
y
.
getDeType
()
==
2
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
y
.
getOriginName
(),
DataTypes
.
LongType
,
true
));
}
else
if
(
y
.
getDeType
()
==
3
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
y
.
getOriginName
(),
DataTypes
.
DoubleType
,
true
));
}
});
StructType
structType
=
DataTypes
.
createStructType
(
structFields
);
// Spark SQL Context
// SQLContext sqlContext = CommonBeanFactory.getBean(SQLContext.class);
SQLContext
sqlContext
=
new
SQLContext
(
sparkContext
);
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
Dataset
<
Row
>
dataFrame
=
sqlContext
.
createDataFrame
(
rdd
,
structType
);
dataFrame
.
createOrReplaceTempView
(
tmpTable
);
Dataset
<
Row
>
sql
=
sqlContext
.
sql
(
getSQL
(
xAxis
,
yAxis
,
tmpTable
));
// transform
List
<
String
[]>
data
=
new
ArrayList
<>();
List
<
Row
>
list
=
sql
.
collectAsList
();
for
(
Row
row
:
list
)
{
String
[]
r
=
new
String
[
row
.
length
()];
for
(
int
i
=
0
;
i
<
row
.
length
();
i
++)
{
r
[
i
]
=
row
.
get
(
i
)
==
null
?
"null"
:
row
.
get
(
i
).
toString
();
}
data
.
add
(
r
);
}
return
data
;
Dataset
<
Row
>
dataFrame
=
sqlContext
.
createDataFrame
(
rdd
,
structType
).
persist
();
CacheUtil
.
getInstance
().
addCacheData
(
hTable
,
dataFrame
);
dataFrame
.
count
();
return
dataFrame
;
}
p
rivate
String
getSQL
(
List
<
ChartViewFieldDTO
>
xAxis
,
List
<
ChartViewFieldDTO
>
yAxis
,
String
table
)
{
p
ublic
String
getSQL
(
List
<
ChartViewFieldDTO
>
xAxis
,
List
<
ChartViewFieldDTO
>
yAxis
,
String
table
)
{
// 字段汇总 排序等
String
[]
field
=
yAxis
.
stream
().
map
(
y
->
"CAST("
+
y
.
getSummary
()
+
"("
+
y
.
getOriginName
()
+
") AS DECIMAL(20,2)) AS _"
+
y
.
getSummary
()
+
"_"
+
y
.
getOriginName
()).
toArray
(
String
[]::
new
);
String
[]
group
=
xAxis
.
stream
().
map
(
ChartViewFieldDTO:
:
getOriginName
).
toArray
(
String
[]::
new
);
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论