Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
D
dataease
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
zhu
dataease
Commits
f3fad450
提交
f3fad450
authored
4月 21, 2021
作者:
taojinlong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat: 测试性能
上级
6b285d73
隐藏空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
240 行增加
和
128 行删除
+240
-128
CommonConfig.java
backend/src/main/java/io/dataease/config/CommonConfig.java
+9
-9
JdbcProvider.java
...in/java/io/dataease/datasource/provider/JdbcProvider.java
+18
-4
AppStartReadHBaseListener.java
.../java/io/dataease/listener/AppStartReadHBaseListener.java
+15
-15
ExtractDataService.java
.../java/io/dataease/service/dataset/ExtractDataService.java
+127
-94
SparkCalc.java
...nd/src/main/java/io/dataease/service/spark/SparkCalc.java
+71
-6
没有找到文件。
backend/src/main/java/io/dataease/config/CommonConfig.java
浏览文件 @
f3fad450
...
@@ -22,15 +22,15 @@ public class CommonConfig {
...
@@ -22,15 +22,15 @@ public class CommonConfig {
private
Environment
env
;
// 保存了配置文件的信息
private
Environment
env
;
// 保存了配置文件的信息
private
static
String
root_path
=
"/opt/dataease/data/kettle/"
;
private
static
String
root_path
=
"/opt/dataease/data/kettle/"
;
@Bean
//
@Bean
@ConditionalOnMissingBean
//
@ConditionalOnMissingBean
public
org
.
apache
.
hadoop
.
conf
.
Configuration
configuration
()
{
//
public org.apache.hadoop.conf.Configuration configuration() {
org
.
apache
.
hadoop
.
conf
.
Configuration
configuration
=
new
org
.
apache
.
hadoop
.
conf
.
Configuration
();
//
org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
configuration
.
set
(
"hbase.zookeeper.quorum"
,
env
.
getProperty
(
"hbase.zookeeper.quorum"
));
//
configuration.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum"));
configuration
.
set
(
"hbase.zookeeper.property.clientPort"
,
env
.
getProperty
(
"hbase.zookeeper.property.clientPort"
));
//
configuration.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort"));
configuration
.
set
(
"hbase.client.retries.number"
,
env
.
getProperty
(
"hbase.client.retries.number"
,
"1"
));
//
configuration.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1"));
return
configuration
;
//
return configuration;
}
//
}
@Bean
@Bean
@ConditionalOnMissingBean
@ConditionalOnMissingBean
...
...
backend/src/main/java/io/dataease/datasource/provider/JdbcProvider.java
浏览文件 @
f3fad450
...
@@ -7,6 +7,7 @@ import io.dataease.datasource.dto.MysqlConfigrationDTO;
...
@@ -7,6 +7,7 @@ import io.dataease.datasource.dto.MysqlConfigrationDTO;
import
io.dataease.datasource.dto.SqlServerConfigration
;
import
io.dataease.datasource.dto.SqlServerConfigration
;
import
io.dataease.datasource.dto.TableFiled
;
import
io.dataease.datasource.dto.TableFiled
;
import
io.dataease.datasource.request.DatasourceRequest
;
import
io.dataease.datasource.request.DatasourceRequest
;
import
org.apache.arrow.util.VisibleForTesting
;
import
org.apache.commons.lang3.StringUtils
;
import
org.apache.commons.lang3.StringUtils
;
import
org.springframework.stereotype.Service
;
import
org.springframework.stereotype.Service
;
...
@@ -39,6 +40,23 @@ public class JdbcProvider extends DatasourceProvider {
...
@@ -39,6 +40,23 @@ public class JdbcProvider extends DatasourceProvider {
return
list
;
return
list
;
}
}
@VisibleForTesting
public
void
exec
(
DatasourceRequest
datasourceRequest
)
throws
Exception
{
Connection
connection
=
null
;
try
{
connection
=
getConnectionFromPool
(
datasourceRequest
);
Statement
stat
=
connection
.
createStatement
();
stat
.
execute
(
datasourceRequest
.
getQuery
());
}
catch
(
SQLException
e
)
{
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
}
catch
(
Exception
e
)
{
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
}
finally
{
returnSource
(
connection
,
datasourceRequest
.
getDatasource
().
getId
());
}
}
@Override
@Override
public
ResultSet
getDataResultSet
(
DatasourceRequest
datasourceRequest
)
throws
Exception
{
public
ResultSet
getDataResultSet
(
DatasourceRequest
datasourceRequest
)
throws
Exception
{
ResultSet
rs
;
ResultSet
rs
;
...
@@ -47,7 +65,6 @@ public class JdbcProvider extends DatasourceProvider {
...
@@ -47,7 +65,6 @@ public class JdbcProvider extends DatasourceProvider {
connection
=
getConnectionFromPool
(
datasourceRequest
);
connection
=
getConnectionFromPool
(
datasourceRequest
);
Statement
stat
=
connection
.
createStatement
();
Statement
stat
=
connection
.
createStatement
();
rs
=
stat
.
executeQuery
(
datasourceRequest
.
getQuery
());
rs
=
stat
.
executeQuery
(
datasourceRequest
.
getQuery
());
returnSource
(
connection
,
datasourceRequest
.
getDatasource
().
getId
());
}
catch
(
SQLException
e
)
{
}
catch
(
SQLException
e
)
{
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
...
@@ -66,7 +83,6 @@ public class JdbcProvider extends DatasourceProvider {
...
@@ -66,7 +83,6 @@ public class JdbcProvider extends DatasourceProvider {
connection
=
getConnectionFromPool
(
datasourceRequest
);
connection
=
getConnectionFromPool
(
datasourceRequest
);
Statement
stat
=
connection
.
createStatement
();
Statement
stat
=
connection
.
createStatement
();
ResultSet
rs
=
stat
.
executeQuery
(
datasourceRequest
.
getQuery
()
+
MessageFormat
.
format
(
" LIMIT {0}, {1}"
,
(
datasourceRequest
.
getStartPage
()
-
1
)
*
datasourceRequest
.
getPageSize
(),
datasourceRequest
.
getPageSize
()));
ResultSet
rs
=
stat
.
executeQuery
(
datasourceRequest
.
getQuery
()
+
MessageFormat
.
format
(
" LIMIT {0}, {1}"
,
(
datasourceRequest
.
getStartPage
()
-
1
)
*
datasourceRequest
.
getPageSize
(),
datasourceRequest
.
getPageSize
()));
returnSource
(
connection
,
datasourceRequest
.
getDatasource
().
getId
());
list
=
fetchResult
(
rs
);
list
=
fetchResult
(
rs
);
}
catch
(
SQLException
e
)
{
}
catch
(
SQLException
e
)
{
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
throw
new
Exception
(
"ERROR:"
+
e
.
getMessage
(),
e
);
...
@@ -174,8 +190,6 @@ public class JdbcProvider extends DatasourceProvider {
...
@@ -174,8 +190,6 @@ public class JdbcProvider extends DatasourceProvider {
return
list
;
return
list
;
}
}
;
@Override
@Override
public
void
test
(
DatasourceRequest
datasourceRequest
)
throws
Exception
{
public
void
test
(
DatasourceRequest
datasourceRequest
)
throws
Exception
{
String
queryStr
=
getTablesSql
(
datasourceRequest
);
String
queryStr
=
getTablesSql
(
datasourceRequest
);
...
...
backend/src/main/java/io/dataease/listener/AppStartReadHBaseListener.java
浏览文件 @
f3fad450
...
@@ -33,20 +33,20 @@ public class AppStartReadHBaseListener implements ApplicationListener<Applicatio
...
@@ -33,20 +33,20 @@ public class AppStartReadHBaseListener implements ApplicationListener<Applicatio
@Override
@Override
public
void
onApplicationEvent
(
ApplicationReadyEvent
applicationReadyEvent
)
{
public
void
onApplicationEvent
(
ApplicationReadyEvent
applicationReadyEvent
)
{
System
.
out
.
println
(
"================= Read HBase start ================="
);
//
System.out.println("================= Read HBase start =================");
// 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
//
// 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
DatasetTableExample
datasetTableExample
=
new
DatasetTableExample
();
//
DatasetTableExample datasetTableExample = new DatasetTableExample();
datasetTableExample
.
createCriteria
().
andModeEqualTo
(
1
);
//
datasetTableExample.createCriteria().andModeEqualTo(1);
List
<
DatasetTable
>
datasetTables
=
datasetTableMapper
.
selectByExampleWithBLOBs
(
datasetTableExample
);
//
List<DatasetTable> datasetTables = datasetTableMapper.selectByExampleWithBLOBs(datasetTableExample);
for
(
DatasetTable
table
:
datasetTables
)
{
//
for (DatasetTable table : datasetTables) {
// commonThreadPool.addTask(() -> {
//
//
commonThreadPool.addTask(() -> {
try
{
//
try {
List
<
DatasetTableField
>
fields
=
dataSetTableFieldsService
.
getFieldsByTableId
(
table
.
getId
());
//
List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
sparkCalc
.
getHBaseDataAndCache
(
table
.
getId
(),
fields
);
//
sparkCalc.getHBaseDataAndCache(table.getId(), fields);
}
catch
(
Exception
e
)
{
//
} catch (Exception e) {
e
.
printStackTrace
();
//
e.printStackTrace();
}
//
}
// });
//
//
});
}
//
}
}
}
}
}
backend/src/main/java/io/dataease/service/dataset/ExtractDataService.java
浏览文件 @
f3fad450
...
@@ -56,6 +56,9 @@ import org.pentaho.di.trans.TransHopMeta;
...
@@ -56,6 +56,9 @@ import org.pentaho.di.trans.TransHopMeta;
import
org.pentaho.di.trans.TransMeta
;
import
org.pentaho.di.trans.TransMeta
;
import
org.pentaho.di.trans.step.StepMeta
;
import
org.pentaho.di.trans.step.StepMeta
;
import
org.pentaho.di.trans.steps.tableinput.TableInputMeta
;
import
org.pentaho.di.trans.steps.tableinput.TableInputMeta
;
import
org.pentaho.di.trans.steps.textfileoutput.TextFileField
;
import
org.pentaho.di.trans.steps.textfileoutput.TextFileOutput
;
import
org.pentaho.di.trans.steps.textfileoutput.TextFileOutputMeta
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.InfoStepDefinition
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.InfoStepDefinition
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassMeta
;
import
org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassMeta
;
...
@@ -105,6 +108,7 @@ public class ExtractDataService {
...
@@ -105,6 +108,7 @@ public class ExtractDataService {
private
static
String
currentUpdateTime
=
"${__current_update_time__}"
;
private
static
String
currentUpdateTime
=
"${__current_update_time__}"
;
private
static
String
dataease_column_family
=
"dataease"
;
private
static
String
dataease_column_family
=
"dataease"
;
private
static
String
root_path
=
"/opt/dataease/data/kettle/"
;
private
static
String
root_path
=
"/opt/dataease/data/kettle/"
;
private
static
String
data_path
=
"/opt/dataease/data/db/"
;
private
static
String
hbase_conf_file
=
"/opt/dataease/conf/hbase-site.xml"
;
private
static
String
hbase_conf_file
=
"/opt/dataease/conf/hbase-site.xml"
;
private
static
String
pentaho_mappings
=
"pentaho_mappings"
;
private
static
String
pentaho_mappings
=
"pentaho_mappings"
;
...
@@ -129,7 +133,7 @@ public class ExtractDataService {
...
@@ -129,7 +133,7 @@ public class ExtractDataService {
DatasetTableTaskLog
datasetTableTaskLog
=
new
DatasetTableTaskLog
();
DatasetTableTaskLog
datasetTableTaskLog
=
new
DatasetTableTaskLog
();
UpdateType
updateType
=
UpdateType
.
valueOf
(
type
);
UpdateType
updateType
=
UpdateType
.
valueOf
(
type
);
try
{
try
{
Admin
admin
=
getConnection
().
getAdmin
();
//
Admin admin = getConnection().getAdmin();
DatasetTable
datasetTable
=
dataSetTableService
.
get
(
datasetTableId
);
DatasetTable
datasetTable
=
dataSetTableService
.
get
(
datasetTableId
);
Datasource
datasource
=
datasourceMapper
.
selectByPrimaryKey
(
datasetTable
.
getDataSourceId
());
Datasource
datasource
=
datasourceMapper
.
selectByPrimaryKey
(
datasetTable
.
getDataSourceId
());
List
<
DatasetTableField
>
datasetTableFields
=
dataSetTableFieldsService
.
list
(
DatasetTableField
.
builder
().
tableId
(
datasetTable
.
getId
()).
build
());
List
<
DatasetTableField
>
datasetTableFields
=
dataSetTableFieldsService
.
list
(
DatasetTableField
.
builder
().
tableId
(
datasetTable
.
getId
()).
build
());
...
@@ -141,10 +145,10 @@ public class ExtractDataService {
...
@@ -141,10 +145,10 @@ public class ExtractDataService {
writeDatasetTableTaskLog
(
datasetTableTaskLog
,
datasetTableId
,
taskId
);
writeDatasetTableTaskLog
(
datasetTableTaskLog
,
datasetTableId
,
taskId
);
//check pentaho_mappings table
//check pentaho_mappings table
TableName
pentaho_mappings
=
TableName
.
valueOf
(
this
.
pentaho_mappings
);
//
TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings);
if
(!
admin
.
tableExists
(
pentaho_mappings
))
{
//
if (!admin.tableExists(pentaho_mappings)) {
creatHaseTable
(
pentaho_mappings
,
admin
,
Arrays
.
asList
(
"columns"
,
"key"
));
//
creatHaseTable(pentaho_mappings, admin, Arrays.asList("columns", "key"));
}
//
}
//check pentaho files
//check pentaho files
if
(!
isExitFile
(
"job_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_"
+
datasetTableId
+
".ktr"
))
{
if
(!
isExitFile
(
"job_"
+
datasetTableId
+
".kjb"
)
||
!
isExitFile
(
"trans_"
+
datasetTableId
+
".ktr"
))
{
...
@@ -152,25 +156,25 @@ public class ExtractDataService {
...
@@ -152,25 +156,25 @@ public class ExtractDataService {
generateJobFile
(
"all_scope"
,
datasetTable
);
generateJobFile
(
"all_scope"
,
datasetTable
);
}
}
if
(!
admin
.
tableExists
(
hbaseTable
))
{
//
if (!admin.tableExists(hbaseTable)) {
creatHaseTable
(
hbaseTable
,
admin
,
Arrays
.
asList
(
dataease_column_family
));
//
creatHaseTable(hbaseTable, admin, Arrays.asList(dataease_column_family));
}
//
}
admin
.
disableTable
(
hbaseTable
);
//
admin.disableTable(hbaseTable);
admin
.
truncateTable
(
hbaseTable
,
true
);
//
admin.truncateTable(hbaseTable, true);
extractData
(
datasetTable
,
"all_scope"
);
extractData
(
datasetTable
,
"all_scope"
);
// after sync complete,read data to cache from HBase
// after sync complete,read data to cache from HBase
sparkCalc
.
getHBaseDataAndCache
(
datasetTableId
,
dataSetTableFieldsService
.
getFieldsByTableId
(
datasetTableId
));
//
sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId));
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
break
;
break
;
case
add_scope:
case
add_scope:
// 增量更新
// 增量更新
if
(!
admin
.
tableExists
(
hbaseTable
))
{
//
if (!admin.tableExists(hbaseTable)) {
LogUtil
.
error
(
"TableName error, dataaset: "
+
datasetTableId
);
//
LogUtil.error("TableName error, dataaset: " + datasetTableId);
return
;
//
return;
}
//
}
DatasetTableIncrementalConfig
datasetTableIncrementalConfig
=
dataSetTableService
.
incrementalConfig
(
datasetTableId
);
DatasetTableIncrementalConfig
datasetTableIncrementalConfig
=
dataSetTableService
.
incrementalConfig
(
datasetTableId
);
if
(
datasetTableIncrementalConfig
==
null
||
StringUtils
.
isEmpty
(
datasetTableIncrementalConfig
.
getTableId
()))
{
if
(
datasetTableIncrementalConfig
==
null
||
StringUtils
.
isEmpty
(
datasetTableIncrementalConfig
.
getTableId
()))
{
return
;
return
;
...
@@ -209,7 +213,7 @@ public class ExtractDataService {
...
@@ -209,7 +213,7 @@ public class ExtractDataService {
extractData
(
datasetTable
,
"incremental_delete"
);
extractData
(
datasetTable
,
"incremental_delete"
);
}
}
// after sync complete,read data to cache from HBase
// after sync complete,read data to cache from HBase
sparkCalc
.
getHBaseDataAndCache
(
datasetTableId
,
dataSetTableFieldsService
.
getFieldsByTableId
(
datasetTableId
));
//
sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId));
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setStatus
(
JobStatus
.
Completed
.
name
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
datasetTableTaskLog
.
setEndTime
(
System
.
currentTimeMillis
());
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
...
@@ -239,17 +243,17 @@ public class ExtractDataService {
...
@@ -239,17 +243,17 @@ public class ExtractDataService {
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
dataSetTableTaskLogService
.
save
(
datasetTableTaskLog
);
}
}
private
void
creatHaseTable
(
TableName
tableName
,
Admin
admin
,
List
<
String
>
columnFamily
)
throws
Exception
{
//
private void creatHaseTable(TableName tableName, Admin admin, List<String> columnFamily) throws Exception {
TableDescriptorBuilder
descBuilder
=
TableDescriptorBuilder
.
newBuilder
(
tableName
);
//
TableDescriptorBuilder descBuilder = TableDescriptorBuilder.newBuilder(tableName);
Collection
<
ColumnFamilyDescriptor
>
families
=
new
ArrayList
<>();
//
Collection<ColumnFamilyDescriptor> families = new ArrayList<>();
for
(
String
s
:
columnFamily
)
{
//
for (String s : columnFamily) {
ColumnFamilyDescriptor
hcd
=
ColumnFamilyDescriptorBuilder
.
of
(
s
);
//
ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.of(s);
families
.
add
(
hcd
);
//
families.add(hcd);
}
//
}
descBuilder
.
setColumnFamilies
(
families
);
//
descBuilder.setColumnFamilies(families);
TableDescriptor
desc
=
descBuilder
.
build
();
//
TableDescriptor desc = descBuilder.build();
admin
.
createTable
(
desc
);
//
admin.createTable(desc);
}
//
}
private
void
extractData
(
DatasetTable
datasetTable
,
String
extractType
)
throws
Exception
{
private
void
extractData
(
DatasetTable
datasetTable
,
String
extractType
)
throws
Exception
{
KettleFileRepository
repository
=
CommonBeanFactory
.
getBean
(
KettleFileRepository
.
class
);
KettleFileRepository
repository
=
CommonBeanFactory
.
getBean
(
KettleFileRepository
.
class
);
...
@@ -285,13 +289,13 @@ public class ExtractDataService {
...
@@ -285,13 +289,13 @@ public class ExtractDataService {
}
}
}
}
private
synchronized
Connection
getConnection
()
throws
Exception
{
//
private synchronized Connection getConnection() throws Exception {
if
(
connection
==
null
||
connection
.
isClosed
())
{
//
if (connection == null || connection.isClosed()) {
Configuration
cfg
=
CommonBeanFactory
.
getBean
(
Configuration
.
class
);
//
Configuration cfg = CommonBeanFactory.getBean(Configuration.class);
connection
=
ConnectionFactory
.
createConnection
(
cfg
,
pool
);
//
connection = ConnectionFactory.createConnection(cfg, pool);
}
//
}
return
connection
;
//
return connection;
}
//
}
private
boolean
isExitFile
(
String
fileName
)
{
private
boolean
isExitFile
(
String
fileName
)
{
File
file
=
new
File
(
root_path
+
fileName
);
File
file
=
new
File
(
root_path
+
fileName
);
...
@@ -380,6 +384,15 @@ public class ExtractDataService {
...
@@ -380,6 +384,15 @@ public class ExtractDataService {
switch
(
extractType
)
{
switch
(
extractType
)
{
case
"all_scope"
:
case
"all_scope"
:
transName
=
"trans_"
+
datasetTable
.
getId
();
transName
=
"trans_"
+
datasetTable
.
getId
();
datasetTableFields
.
sort
((
o1
,
o2
)
->
{
if
(
o1
.
getOriginName
()
==
null
)
{
return
-
1
;
}
if
(
o2
.
getOriginName
()
==
null
)
{
return
1
;
}
return
o1
.
getOriginName
().
compareTo
(
o2
.
getOriginName
());
});
selectSQL
=
dataSetTableService
.
createQuerySQL
(
datasource
.
getType
(),
table
,
datasetTableFields
.
stream
().
map
(
DatasetTableField:
:
getOriginName
).
toArray
(
String
[]::
new
));
selectSQL
=
dataSetTableService
.
createQuerySQL
(
datasource
.
getType
(),
table
,
datasetTableFields
.
stream
().
map
(
DatasetTableField:
:
getOriginName
).
toArray
(
String
[]::
new
));
break
;
break
;
case
"incremental_add"
:
case
"incremental_add"
:
...
@@ -422,70 +435,90 @@ public class ExtractDataService {
...
@@ -422,70 +435,90 @@ public class ExtractDataService {
fromStep
.
setLocation
(
100
,
100
);
fromStep
.
setLocation
(
100
,
100
);
transMeta
.
addStep
(
fromStep
);
transMeta
.
addStep
(
fromStep
);
//第二个 (User defined Java class)
//第二个 (TextFileOutput)
UserDefinedJavaClassMeta
userDefinedJavaClassMeta
=
new
UserDefinedJavaClassMeta
();
TextFileOutputMeta
textFileOutputMeta
=
new
TextFileOutputMeta
();
List
<
UserDefinedJavaClassMeta
.
FieldInfo
>
fields
=
new
ArrayList
<>();
textFileOutputMeta
.
setFilename
(
data_path
+
datasetTable
.
getId
());
UserDefinedJavaClassMeta
.
FieldInfo
fieldInfo
=
new
UserDefinedJavaClassMeta
.
FieldInfo
(
"uuid"
,
ValueMetaInterface
.
TYPE_STRING
,
-
1
,
-
1
);
textFileOutputMeta
.
setExtension
(
"txt"
);
fields
.
add
(
fieldInfo
);
textFileOutputMeta
.
setSeparator
(
";"
);
userDefinedJavaClassMeta
.
setFieldInfo
(
fields
);
textFileOutputMeta
.
setFileCompression
(
"None"
);
List
<
UserDefinedJavaClassDef
>
definitions
=
new
ArrayList
<
UserDefinedJavaClassDef
>();
textFileOutputMeta
.
setEnclosure
(
"\""
);
UserDefinedJavaClassDef
userDefinedJavaClassDef
=
new
UserDefinedJavaClassDef
(
UserDefinedJavaClassDef
.
ClassType
.
TRANSFORM_CLASS
,
"Processor"
,
code
);
textFileOutputMeta
.
setEncoding
(
"UTF-8"
);
userDefinedJavaClassDef
.
setActive
(
true
);
TextFileField
[]
outputFields
=
new
TextFileField
[
1
];
definitions
.
add
(
userDefinedJavaClassDef
);
outputFields
[
0
]
=
new
TextFileField
();
userDefinedJavaClassMeta
.
replaceDefinitions
(
definitions
);
textFileOutputMeta
.
setOutputFields
(
outputFields
);
StepMeta
userDefinedJavaClassStep
=
new
StepMeta
(
"UserDefinedJavaClass"
,
"UserDefinedJavaClass"
,
userDefinedJavaClassMeta
);
StepMeta
tostep
=
new
StepMeta
(
"TextFileOutput"
,
"TextFileOutput"
,
textFileOutputMeta
);
userDefinedJavaClassStep
.
setLocation
(
300
,
100
);
userDefinedJavaClassStep
.
setDraw
(
true
);
transMeta
.
addStep
(
userDefinedJavaClassStep
);
//第三个 (HBaseOutputMeta)
NamedClusterService
namedClusterService
=
new
NamedClusterManager
();
NamedCluster
clusterTemplate
=
new
NamedClusterImpl
();
clusterTemplate
.
setName
(
"hadoop"
);
clusterTemplate
.
setZooKeeperHost
(
zkHost
);
clusterTemplate
.
setZooKeeperPort
(
zkPort
);
clusterTemplate
.
setStorageScheme
(
"HDFS"
);
namedClusterService
.
setClusterTemplate
(
clusterTemplate
);
List
<
ClusterInitializerProvider
>
providers
=
new
ArrayList
<>();
ClusterInitializer
clusterInitializer
=
new
ClusterInitializerImpl
(
providers
);
NamedClusterServiceLocator
namedClusterServiceLocator
=
new
NamedClusterServiceLocatorImpl
(
clusterInitializer
);
List
<
RuntimeTestActionHandler
>
runtimeTestActionHandlers
=
new
ArrayList
<>();
RuntimeTestActionHandler
defaultHandler
=
null
;
RuntimeTestActionService
runtimeTestActionService
=
new
RuntimeTestActionServiceImpl
(
runtimeTestActionHandlers
,
defaultHandler
);
RuntimeTester
runtimeTester
=
new
RuntimeTesterImpl
(
new
ArrayList
<>(
Arrays
.
asList
(
mock
(
RuntimeTest
.
class
))),
mock
(
ExecutorService
.
class
),
"modules"
);
Put
put
=
new
Put
((
datasetTable
.
getId
()
+
","
+
"target_mapping"
).
getBytes
());
for
(
DatasetTableField
datasetTableField
:
datasetTableFields
)
{
put
.
addColumn
(
"columns"
.
getBytes
(),
(
dataease_column_family
+
","
+
datasetTableField
.
getOriginName
()
+
","
+
datasetTableField
.
getOriginName
()).
getBytes
(),
transToColumnType
(
datasetTableField
.
getDeType
()).
getBytes
());
}
put
.
addColumn
(
"key"
.
getBytes
(),
"uuid"
.
getBytes
(),
"String"
.
getBytes
());
TableName
pentaho_mappings
=
TableName
.
valueOf
(
this
.
pentaho_mappings
);
Table
tab
=
getConnection
().
getTable
(
pentaho_mappings
);
tab
.
put
(
put
);
HBaseOutputMeta
hBaseOutputMeta
=
new
HBaseOutputMeta
(
namedClusterService
,
namedClusterServiceLocator
,
runtimeTestActionService
,
runtimeTester
);
hBaseOutputMeta
.
setTargetTableName
(
datasetTable
.
getId
());
hBaseOutputMeta
.
setTargetMappingName
(
"target_mapping"
);
hBaseOutputMeta
.
setNamedCluster
(
clusterTemplate
);
hBaseOutputMeta
.
setCoreConfigURL
(
hbase_conf_file
);
hBaseOutputMeta
.
setDisableWriteToWAL
(
true
);
hBaseOutputMeta
.
setWriteBufferSize
(
"31457280"
);
//30M
if
(
extractType
.
equalsIgnoreCase
(
"incremental_delete"
))
{
hBaseOutputMeta
.
setDeleteRowKey
(
true
);
}
StepMeta
tostep
=
new
StepMeta
(
"HBaseOutput"
,
"HBaseOutput"
,
hBaseOutputMeta
);
tostep
.
setLocation
(
600
,
100
);
tostep
.
setLocation
(
600
,
100
);
tostep
.
setDraw
(
true
);
tostep
.
setDraw
(
true
);
transMeta
.
addStep
(
tostep
);
transMeta
.
addStep
(
tostep
);
TransHopMeta
hi1
=
new
TransHopMeta
(
fromStep
,
userDefinedJavaClassStep
);
TransHopMeta
hi1
=
new
TransHopMeta
(
fromStep
,
tostep
);
TransHopMeta
hi2
=
new
TransHopMeta
(
userDefinedJavaClassStep
,
tostep
);
transMeta
.
addTransHop
(
hi1
);
transMeta
.
addTransHop
(
hi1
);
transMeta
.
addTransHop
(
hi2
);
// //第二个 (User defined Java class)
// UserDefinedJavaClassMeta userDefinedJavaClassMeta = new UserDefinedJavaClassMeta();
// List<UserDefinedJavaClassMeta.FieldInfo> fields = new ArrayList<>();
// UserDefinedJavaClassMeta.FieldInfo fieldInfo = new UserDefinedJavaClassMeta.FieldInfo("uuid", ValueMetaInterface.TYPE_STRING, -1, -1);
// fields.add(fieldInfo);
// userDefinedJavaClassMeta.setFieldInfo(fields);
// List<UserDefinedJavaClassDef> definitions = new ArrayList<UserDefinedJavaClassDef>();
// UserDefinedJavaClassDef userDefinedJavaClassDef = new UserDefinedJavaClassDef(UserDefinedJavaClassDef.ClassType.TRANSFORM_CLASS, "Processor", code);
// userDefinedJavaClassDef.setActive(true);
// definitions.add(userDefinedJavaClassDef);
// userDefinedJavaClassMeta.replaceDefinitions(definitions);
//
// StepMeta userDefinedJavaClassStep = new StepMeta("UserDefinedJavaClass", "UserDefinedJavaClass", userDefinedJavaClassMeta);
// userDefinedJavaClassStep.setLocation(300, 100);
// userDefinedJavaClassStep.setDraw(true);
// transMeta.addStep(userDefinedJavaClassStep);
//
// //第三个 (HBaseOutputMeta)
// NamedClusterService namedClusterService = new NamedClusterManager();
// NamedCluster clusterTemplate = new NamedClusterImpl();
// clusterTemplate.setName("hadoop");
// clusterTemplate.setZooKeeperHost(zkHost);
// clusterTemplate.setZooKeeperPort(zkPort);
// clusterTemplate.setStorageScheme("HDFS");
// namedClusterService.setClusterTemplate(clusterTemplate);
//
// List<ClusterInitializerProvider> providers = new ArrayList<>();
// ClusterInitializer clusterInitializer = new ClusterInitializerImpl(providers);
// NamedClusterServiceLocator namedClusterServiceLocator = new NamedClusterServiceLocatorImpl(clusterInitializer);
//
// List<RuntimeTestActionHandler> runtimeTestActionHandlers = new ArrayList<>();
// RuntimeTestActionHandler defaultHandler = null;
//
// RuntimeTestActionService runtimeTestActionService = new RuntimeTestActionServiceImpl(runtimeTestActionHandlers, defaultHandler);
// RuntimeTester runtimeTester = new RuntimeTesterImpl(new ArrayList<>(Arrays.asList(mock(RuntimeTest.class))), mock(ExecutorService.class), "modules");
//
// Put put = new Put((datasetTable.getId() + "," + "target_mapping").getBytes());
// for (DatasetTableField datasetTableField : datasetTableFields) {
// put.addColumn("columns".getBytes(), (dataease_column_family + "," + datasetTableField.getOriginName() + "," + datasetTableField.getOriginName()).getBytes(), transToColumnType(datasetTableField.getDeType()).getBytes());
// }
// put.addColumn("key".getBytes(), "uuid".getBytes(), "String".getBytes());
// TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings);
// Table tab = getConnection().getTable(pentaho_mappings);
// tab.put(put);
//
// HBaseOutputMeta hBaseOutputMeta = new HBaseOutputMeta(namedClusterService, namedClusterServiceLocator, runtimeTestActionService, runtimeTester);
// hBaseOutputMeta.setTargetTableName(datasetTable.getId());
// hBaseOutputMeta.setTargetMappingName("target_mapping");
// hBaseOutputMeta.setNamedCluster(clusterTemplate);
// hBaseOutputMeta.setCoreConfigURL(hbase_conf_file);
// hBaseOutputMeta.setDisableWriteToWAL(true);
// hBaseOutputMeta.setWriteBufferSize("31457280"); //30M
// if (extractType.equalsIgnoreCase("incremental_delete")) {
// hBaseOutputMeta.setDeleteRowKey(true);
// }
// StepMeta tostep = new StepMeta("HBaseOutput", "HBaseOutput", hBaseOutputMeta);
// tostep.setLocation(600, 100);
//
// tostep.setDraw(true);
// transMeta.addStep(tostep);
// TransHopMeta hi1 = new TransHopMeta(fromStep, userDefinedJavaClassStep);
// TransHopMeta hi2 = new TransHopMeta(userDefinedJavaClassStep, tostep);
// transMeta.addTransHop(hi1);
// transMeta.addTransHop(hi2);
String
transXml
=
transMeta
.
getXML
();
String
transXml
=
transMeta
.
getXML
();
File
file
=
new
File
(
root_path
+
transName
+
".ktr"
);
File
file
=
new
File
(
root_path
+
transName
+
".ktr"
);
...
...
backend/src/main/java/io/dataease/service/spark/SparkCalc.java
浏览文件 @
f3fad450
...
@@ -41,6 +41,7 @@ import java.util.List;
...
@@ -41,6 +41,7 @@ import java.util.List;
@Service
@Service
public
class
SparkCalc
{
public
class
SparkCalc
{
private
static
String
column_family
=
"dataease"
;
private
static
String
column_family
=
"dataease"
;
private
static
String
data_path
=
"/opt/dataease/data/db/"
;
@Resource
@Resource
private
Environment
env
;
// 保存了配置文件的信息
private
Environment
env
;
// 保存了配置文件的信息
...
@@ -54,12 +55,13 @@ public class SparkCalc {
...
@@ -54,12 +55,13 @@ public class SparkCalc {
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.sql.shuffle.partitions"
,
env
.
getProperty
(
"spark.sql.shuffle.partitions"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
sqlContext
.
setConf
(
"spark.default.parallelism"
,
env
.
getProperty
(
"spark.default.parallelism"
,
"1"
));
Dataset
<
Row
>
dataFrame
=
CacheUtil
.
getInstance
().
getCacheData
(
hTable
);
Dataset
<
Row
>
dataFrame
=
getData
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
if
(
ObjectUtils
.
isEmpty
(
dataFrame
))
{
// Dataset<Row> dataFrame = CacheUtil.getInstance().getCacheData(hTable);
dataFrame
=
getHBaseDataAndCache
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
// if (ObjectUtils.isEmpty(dataFrame)) {
}
// dataFrame = getData(sparkContext, sqlContext, hTable, fields);
// }
dataFrame
.
createOrReplaceTempView
(
tmpTable
);
dataFrame
.
createOrReplaceTempView
(
tmpTable
);
Dataset
<
Row
>
sql
=
sqlContext
.
sql
(
getSQL
(
xAxis
,
yAxis
,
tmpTable
,
requestList
));
Dataset
<
Row
>
sql
=
sqlContext
.
sql
(
getSQL
(
xAxis
,
yAxis
,
tmpTable
,
requestList
));
// transform
// transform
List
<
String
[]>
data
=
new
ArrayList
<>();
List
<
String
[]>
data
=
new
ArrayList
<>();
...
@@ -86,6 +88,69 @@ public class SparkCalc {
...
@@ -86,6 +88,69 @@ public class SparkCalc {
return
getHBaseDataAndCache
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
return
getHBaseDataAndCache
(
sparkContext
,
sqlContext
,
hTable
,
fields
);
}
}
public
Dataset
<
Row
>
getData
(
JavaSparkContext
sparkContext
,
SQLContext
sqlContext
,
String
tableId
,
List
<
DatasetTableField
>
fields
)
throws
Exception
{
fields
.
sort
((
o1
,
o2
)
->
{
if
(
o1
.
getOriginName
()
==
null
)
{
return
-
1
;
}
if
(
o2
.
getOriginName
()
==
null
)
{
return
1
;
}
return
o1
.
getOriginName
().
compareTo
(
o2
.
getOriginName
());
});
JavaRDD
<
String
>
pairRDD
=
sparkContext
.
textFile
(
data_path
+
tableId
+
".txt"
);
JavaRDD
<
Row
>
rdd
=
pairRDD
.
mapPartitions
(
(
FlatMapFunction
<
java
.
util
.
Iterator
<
String
>,
Row
>)
tuple2Iterator
->
{
List
<
Row
>
iterator
=
new
ArrayList
<>();
while
(
tuple2Iterator
.
hasNext
())
{
String
[]
items
=
tuple2Iterator
.
next
().
split
(
";"
);
List
<
Object
>
list
=
new
ArrayList
<>();
for
(
int
i
=
0
;
i
<
items
.
length
;
i
++){
String
l
=
items
[
i
];
DatasetTableField
x
=
fields
.
get
(
i
);
if
(
x
.
getDeType
()
==
0
||
x
.
getDeType
()
==
1
)
{
list
.
add
(
l
);
}
else
if
(
x
.
getDeType
()
==
2
)
{
if
(
StringUtils
.
isEmpty
(
l
))
{
l
=
"0"
;
}
if
(
StringUtils
.
equalsIgnoreCase
(
l
,
"Y"
))
{
l
=
"1"
;
}
if
(
StringUtils
.
equalsIgnoreCase
(
l
,
"N"
))
{
l
=
"0"
;
}
list
.
add
(
Long
.
valueOf
(
l
));
}
else
if
(
x
.
getDeType
()
==
3
)
{
if
(
StringUtils
.
isEmpty
(
l
))
{
l
=
"0.0"
;
}
list
.
add
(
Double
.
valueOf
(
l
));
}
}
iterator
.
add
(
RowFactory
.
create
(
list
.
toArray
()));
}
return
iterator
.
iterator
();
});
List
<
StructField
>
structFields
=
new
ArrayList
<>();
// struct顺序要与rdd顺序一致
fields
.
forEach
(
x
->
{
if
(
x
.
getDeType
()
==
0
||
x
.
getDeType
()
==
1
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
x
.
getOriginName
(),
DataTypes
.
StringType
,
true
));
}
else
if
(
x
.
getDeType
()
==
2
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
x
.
getOriginName
(),
DataTypes
.
LongType
,
true
));
}
else
if
(
x
.
getDeType
()
==
3
)
{
structFields
.
add
(
DataTypes
.
createStructField
(
x
.
getOriginName
(),
DataTypes
.
DoubleType
,
true
));
}
});
StructType
structType
=
DataTypes
.
createStructType
(
structFields
);
Dataset
<
Row
>
dataFrame
=
sqlContext
.
createDataFrame
(
rdd
,
structType
);
return
dataFrame
;
}
public
Dataset
<
Row
>
getHBaseDataAndCache
(
JavaSparkContext
sparkContext
,
SQLContext
sqlContext
,
String
hTable
,
List
<
DatasetTableField
>
fields
)
throws
Exception
{
public
Dataset
<
Row
>
getHBaseDataAndCache
(
JavaSparkContext
sparkContext
,
SQLContext
sqlContext
,
String
hTable
,
List
<
DatasetTableField
>
fields
)
throws
Exception
{
Scan
scan
=
new
Scan
();
Scan
scan
=
new
Scan
();
scan
.
addFamily
(
Bytes
.
toBytes
(
column_family
));
scan
.
addFamily
(
Bytes
.
toBytes
(
column_family
));
...
@@ -145,7 +210,7 @@ public class SparkCalc {
...
@@ -145,7 +210,7 @@ public class SparkCalc {
StructType
structType
=
DataTypes
.
createStructType
(
structFields
);
StructType
structType
=
DataTypes
.
createStructType
(
structFields
);
Dataset
<
Row
>
dataFrame
=
sqlContext
.
createDataFrame
(
rdd
,
structType
).
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
());
Dataset
<
Row
>
dataFrame
=
sqlContext
.
createDataFrame
(
rdd
,
structType
).
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
());
CacheUtil
.
getInstance
().
addCacheData
(
hTable
,
dataFrame
);
//
CacheUtil.getInstance().addCacheData(hTable, dataFrame);
dataFrame
.
count
();
dataFrame
.
count
();
return
dataFrame
;
return
dataFrame
;
}
}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论