提交 f3fad450 authored 作者: taojinlong's avatar taojinlong

feat: 测试性能

上级 6b285d73
...@@ -22,15 +22,15 @@ public class CommonConfig { ...@@ -22,15 +22,15 @@ public class CommonConfig {
private Environment env; // 保存了配置文件的信息 private Environment env; // 保存了配置文件的信息
private static String root_path = "/opt/dataease/data/kettle/"; private static String root_path = "/opt/dataease/data/kettle/";
@Bean // @Bean
@ConditionalOnMissingBean // @ConditionalOnMissingBean
public org.apache.hadoop.conf.Configuration configuration() { // public org.apache.hadoop.conf.Configuration configuration() {
org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration(); // org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
configuration.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum")); // configuration.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum"));
configuration.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort")); // configuration.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort"));
configuration.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1")); // configuration.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1"));
return configuration; // return configuration;
} // }
@Bean @Bean
@ConditionalOnMissingBean @ConditionalOnMissingBean
......
...@@ -7,6 +7,7 @@ import io.dataease.datasource.dto.MysqlConfigrationDTO; ...@@ -7,6 +7,7 @@ import io.dataease.datasource.dto.MysqlConfigrationDTO;
import io.dataease.datasource.dto.SqlServerConfigration; import io.dataease.datasource.dto.SqlServerConfigration;
import io.dataease.datasource.dto.TableFiled; import io.dataease.datasource.dto.TableFiled;
import io.dataease.datasource.request.DatasourceRequest; import io.dataease.datasource.request.DatasourceRequest;
import org.apache.arrow.util.VisibleForTesting;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
...@@ -39,6 +40,23 @@ public class JdbcProvider extends DatasourceProvider { ...@@ -39,6 +40,23 @@ public class JdbcProvider extends DatasourceProvider {
return list; return list;
} }
@VisibleForTesting
public void exec(DatasourceRequest datasourceRequest) throws Exception {
Connection connection = null;
try {
connection = getConnectionFromPool(datasourceRequest);
Statement stat = connection.createStatement();
stat.execute(datasourceRequest.getQuery());
} catch (SQLException e) {
throw new Exception("ERROR:" + e.getMessage(), e);
} catch (Exception e) {
throw new Exception("ERROR:" + e.getMessage(), e);
}finally {
returnSource(connection, datasourceRequest.getDatasource().getId());
}
}
@Override @Override
public ResultSet getDataResultSet(DatasourceRequest datasourceRequest) throws Exception { public ResultSet getDataResultSet(DatasourceRequest datasourceRequest) throws Exception {
ResultSet rs; ResultSet rs;
...@@ -47,7 +65,6 @@ public class JdbcProvider extends DatasourceProvider { ...@@ -47,7 +65,6 @@ public class JdbcProvider extends DatasourceProvider {
connection = getConnectionFromPool(datasourceRequest); connection = getConnectionFromPool(datasourceRequest);
Statement stat = connection.createStatement(); Statement stat = connection.createStatement();
rs = stat.executeQuery(datasourceRequest.getQuery()); rs = stat.executeQuery(datasourceRequest.getQuery());
returnSource(connection, datasourceRequest.getDatasource().getId());
} catch (SQLException e) { } catch (SQLException e) {
throw new Exception("ERROR:" + e.getMessage(), e); throw new Exception("ERROR:" + e.getMessage(), e);
} catch (Exception e) { } catch (Exception e) {
...@@ -66,7 +83,6 @@ public class JdbcProvider extends DatasourceProvider { ...@@ -66,7 +83,6 @@ public class JdbcProvider extends DatasourceProvider {
connection = getConnectionFromPool(datasourceRequest); connection = getConnectionFromPool(datasourceRequest);
Statement stat = connection.createStatement(); Statement stat = connection.createStatement();
ResultSet rs = stat.executeQuery(datasourceRequest.getQuery() + MessageFormat.format(" LIMIT {0}, {1}", (datasourceRequest.getStartPage() - 1) * datasourceRequest.getPageSize(), datasourceRequest.getPageSize())); ResultSet rs = stat.executeQuery(datasourceRequest.getQuery() + MessageFormat.format(" LIMIT {0}, {1}", (datasourceRequest.getStartPage() - 1) * datasourceRequest.getPageSize(), datasourceRequest.getPageSize()));
returnSource(connection, datasourceRequest.getDatasource().getId());
list = fetchResult(rs); list = fetchResult(rs);
} catch (SQLException e) { } catch (SQLException e) {
throw new Exception("ERROR:" + e.getMessage(), e); throw new Exception("ERROR:" + e.getMessage(), e);
...@@ -174,8 +190,6 @@ public class JdbcProvider extends DatasourceProvider { ...@@ -174,8 +190,6 @@ public class JdbcProvider extends DatasourceProvider {
return list; return list;
} }
;
@Override @Override
public void test(DatasourceRequest datasourceRequest) throws Exception { public void test(DatasourceRequest datasourceRequest) throws Exception {
String queryStr = getTablesSql(datasourceRequest); String queryStr = getTablesSql(datasourceRequest);
......
...@@ -33,20 +33,20 @@ public class AppStartReadHBaseListener implements ApplicationListener<Applicatio ...@@ -33,20 +33,20 @@ public class AppStartReadHBaseListener implements ApplicationListener<Applicatio
@Override @Override
public void onApplicationEvent(ApplicationReadyEvent applicationReadyEvent) { public void onApplicationEvent(ApplicationReadyEvent applicationReadyEvent) {
System.out.println("================= Read HBase start ================="); // System.out.println("================= Read HBase start =================");
// 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存 // // 项目启动,从数据集中找到定时抽取的表,从HBase中读取放入缓存
DatasetTableExample datasetTableExample = new DatasetTableExample(); // DatasetTableExample datasetTableExample = new DatasetTableExample();
datasetTableExample.createCriteria().andModeEqualTo(1); // datasetTableExample.createCriteria().andModeEqualTo(1);
List<DatasetTable> datasetTables = datasetTableMapper.selectByExampleWithBLOBs(datasetTableExample); // List<DatasetTable> datasetTables = datasetTableMapper.selectByExampleWithBLOBs(datasetTableExample);
for (DatasetTable table : datasetTables) { // for (DatasetTable table : datasetTables) {
// commonThreadPool.addTask(() -> { //// commonThreadPool.addTask(() -> {
try { // try {
List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId()); // List<DatasetTableField> fields = dataSetTableFieldsService.getFieldsByTableId(table.getId());
sparkCalc.getHBaseDataAndCache(table.getId(), fields); // sparkCalc.getHBaseDataAndCache(table.getId(), fields);
} catch (Exception e) { // } catch (Exception e) {
e.printStackTrace(); // e.printStackTrace();
} // }
// }); //// });
} // }
} }
} }
...@@ -56,6 +56,9 @@ import org.pentaho.di.trans.TransHopMeta; ...@@ -56,6 +56,9 @@ import org.pentaho.di.trans.TransHopMeta;
import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.tableinput.TableInputMeta; import org.pentaho.di.trans.steps.tableinput.TableInputMeta;
import org.pentaho.di.trans.steps.textfileoutput.TextFileField;
import org.pentaho.di.trans.steps.textfileoutput.TextFileOutput;
import org.pentaho.di.trans.steps.textfileoutput.TextFileOutputMeta;
import org.pentaho.di.trans.steps.userdefinedjavaclass.InfoStepDefinition; import org.pentaho.di.trans.steps.userdefinedjavaclass.InfoStepDefinition;
import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef; import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassDef;
import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassMeta; import org.pentaho.di.trans.steps.userdefinedjavaclass.UserDefinedJavaClassMeta;
...@@ -105,6 +108,7 @@ public class ExtractDataService { ...@@ -105,6 +108,7 @@ public class ExtractDataService {
private static String currentUpdateTime = "${__current_update_time__}"; private static String currentUpdateTime = "${__current_update_time__}";
private static String dataease_column_family = "dataease"; private static String dataease_column_family = "dataease";
private static String root_path = "/opt/dataease/data/kettle/"; private static String root_path = "/opt/dataease/data/kettle/";
private static String data_path = "/opt/dataease/data/db/";
private static String hbase_conf_file = "/opt/dataease/conf/hbase-site.xml"; private static String hbase_conf_file = "/opt/dataease/conf/hbase-site.xml";
private static String pentaho_mappings = "pentaho_mappings"; private static String pentaho_mappings = "pentaho_mappings";
...@@ -129,7 +133,7 @@ public class ExtractDataService { ...@@ -129,7 +133,7 @@ public class ExtractDataService {
DatasetTableTaskLog datasetTableTaskLog = new DatasetTableTaskLog(); DatasetTableTaskLog datasetTableTaskLog = new DatasetTableTaskLog();
UpdateType updateType = UpdateType.valueOf(type); UpdateType updateType = UpdateType.valueOf(type);
try { try {
Admin admin = getConnection().getAdmin(); // Admin admin = getConnection().getAdmin();
DatasetTable datasetTable = dataSetTableService.get(datasetTableId); DatasetTable datasetTable = dataSetTableService.get(datasetTableId);
Datasource datasource = datasourceMapper.selectByPrimaryKey(datasetTable.getDataSourceId()); Datasource datasource = datasourceMapper.selectByPrimaryKey(datasetTable.getDataSourceId());
List<DatasetTableField> datasetTableFields = dataSetTableFieldsService.list(DatasetTableField.builder().tableId(datasetTable.getId()).build()); List<DatasetTableField> datasetTableFields = dataSetTableFieldsService.list(DatasetTableField.builder().tableId(datasetTable.getId()).build());
...@@ -141,10 +145,10 @@ public class ExtractDataService { ...@@ -141,10 +145,10 @@ public class ExtractDataService {
writeDatasetTableTaskLog(datasetTableTaskLog, datasetTableId, taskId); writeDatasetTableTaskLog(datasetTableTaskLog, datasetTableId, taskId);
//check pentaho_mappings table //check pentaho_mappings table
TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings); // TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings);
if (!admin.tableExists(pentaho_mappings)) { // if (!admin.tableExists(pentaho_mappings)) {
creatHaseTable(pentaho_mappings, admin, Arrays.asList("columns", "key")); // creatHaseTable(pentaho_mappings, admin, Arrays.asList("columns", "key"));
} // }
//check pentaho files //check pentaho files
if (!isExitFile("job_" + datasetTableId + ".kjb") || !isExitFile("trans_" + datasetTableId + ".ktr")) { if (!isExitFile("job_" + datasetTableId + ".kjb") || !isExitFile("trans_" + datasetTableId + ".ktr")) {
...@@ -152,25 +156,25 @@ public class ExtractDataService { ...@@ -152,25 +156,25 @@ public class ExtractDataService {
generateJobFile("all_scope", datasetTable); generateJobFile("all_scope", datasetTable);
} }
if (!admin.tableExists(hbaseTable)) { // if (!admin.tableExists(hbaseTable)) {
creatHaseTable(hbaseTable, admin, Arrays.asList(dataease_column_family)); // creatHaseTable(hbaseTable, admin, Arrays.asList(dataease_column_family));
} // }
admin.disableTable(hbaseTable); // admin.disableTable(hbaseTable);
admin.truncateTable(hbaseTable, true); // admin.truncateTable(hbaseTable, true);
extractData(datasetTable, "all_scope"); extractData(datasetTable, "all_scope");
// after sync complete,read data to cache from HBase // after sync complete,read data to cache from HBase
sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId)); // sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId));
datasetTableTaskLog.setStatus(JobStatus.Completed.name()); datasetTableTaskLog.setStatus(JobStatus.Completed.name());
datasetTableTaskLog.setEndTime(System.currentTimeMillis()); datasetTableTaskLog.setEndTime(System.currentTimeMillis());
dataSetTableTaskLogService.save(datasetTableTaskLog); dataSetTableTaskLogService.save(datasetTableTaskLog);
break; break;
case add_scope: case add_scope:
// 增量更新 // 增量更新
if (!admin.tableExists(hbaseTable)) { // if (!admin.tableExists(hbaseTable)) {
LogUtil.error("TableName error, dataaset: " + datasetTableId); // LogUtil.error("TableName error, dataaset: " + datasetTableId);
return; // return;
} // }
DatasetTableIncrementalConfig datasetTableIncrementalConfig = dataSetTableService.incrementalConfig(datasetTableId); DatasetTableIncrementalConfig datasetTableIncrementalConfig = dataSetTableService.incrementalConfig(datasetTableId);
if (datasetTableIncrementalConfig == null || StringUtils.isEmpty(datasetTableIncrementalConfig.getTableId())) { if (datasetTableIncrementalConfig == null || StringUtils.isEmpty(datasetTableIncrementalConfig.getTableId())) {
return; return;
...@@ -209,7 +213,7 @@ public class ExtractDataService { ...@@ -209,7 +213,7 @@ public class ExtractDataService {
extractData(datasetTable, "incremental_delete"); extractData(datasetTable, "incremental_delete");
} }
// after sync complete,read data to cache from HBase // after sync complete,read data to cache from HBase
sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId)); // sparkCalc.getHBaseDataAndCache(datasetTableId, dataSetTableFieldsService.getFieldsByTableId(datasetTableId));
datasetTableTaskLog.setStatus(JobStatus.Completed.name()); datasetTableTaskLog.setStatus(JobStatus.Completed.name());
datasetTableTaskLog.setEndTime(System.currentTimeMillis()); datasetTableTaskLog.setEndTime(System.currentTimeMillis());
dataSetTableTaskLogService.save(datasetTableTaskLog); dataSetTableTaskLogService.save(datasetTableTaskLog);
...@@ -239,17 +243,17 @@ public class ExtractDataService { ...@@ -239,17 +243,17 @@ public class ExtractDataService {
dataSetTableTaskLogService.save(datasetTableTaskLog); dataSetTableTaskLogService.save(datasetTableTaskLog);
} }
private void creatHaseTable(TableName tableName, Admin admin, List<String> columnFamily) throws Exception { // private void creatHaseTable(TableName tableName, Admin admin, List<String> columnFamily) throws Exception {
TableDescriptorBuilder descBuilder = TableDescriptorBuilder.newBuilder(tableName); // TableDescriptorBuilder descBuilder = TableDescriptorBuilder.newBuilder(tableName);
Collection<ColumnFamilyDescriptor> families = new ArrayList<>(); // Collection<ColumnFamilyDescriptor> families = new ArrayList<>();
for (String s : columnFamily) { // for (String s : columnFamily) {
ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.of(s); // ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.of(s);
families.add(hcd); // families.add(hcd);
} // }
descBuilder.setColumnFamilies(families); // descBuilder.setColumnFamilies(families);
TableDescriptor desc = descBuilder.build(); // TableDescriptor desc = descBuilder.build();
admin.createTable(desc); // admin.createTable(desc);
} // }
private void extractData(DatasetTable datasetTable, String extractType) throws Exception { private void extractData(DatasetTable datasetTable, String extractType) throws Exception {
KettleFileRepository repository = CommonBeanFactory.getBean(KettleFileRepository.class); KettleFileRepository repository = CommonBeanFactory.getBean(KettleFileRepository.class);
...@@ -285,13 +289,13 @@ public class ExtractDataService { ...@@ -285,13 +289,13 @@ public class ExtractDataService {
} }
} }
private synchronized Connection getConnection() throws Exception { // private synchronized Connection getConnection() throws Exception {
if (connection == null || connection.isClosed()) { // if (connection == null || connection.isClosed()) {
Configuration cfg = CommonBeanFactory.getBean(Configuration.class); // Configuration cfg = CommonBeanFactory.getBean(Configuration.class);
connection = ConnectionFactory.createConnection(cfg, pool); // connection = ConnectionFactory.createConnection(cfg, pool);
} // }
return connection; // return connection;
} // }
private boolean isExitFile(String fileName) { private boolean isExitFile(String fileName) {
File file = new File(root_path + fileName); File file = new File(root_path + fileName);
...@@ -380,6 +384,15 @@ public class ExtractDataService { ...@@ -380,6 +384,15 @@ public class ExtractDataService {
switch (extractType) { switch (extractType) {
case "all_scope": case "all_scope":
transName = "trans_" + datasetTable.getId(); transName = "trans_" + datasetTable.getId();
datasetTableFields.sort((o1, o2) -> {
if (o1.getOriginName() == null) {
return -1;
}
if (o2.getOriginName() == null) {
return 1;
}
return o1.getOriginName().compareTo(o2.getOriginName());
});
selectSQL = dataSetTableService.createQuerySQL(datasource.getType(), table, datasetTableFields.stream().map(DatasetTableField::getOriginName).toArray(String[]::new)); selectSQL = dataSetTableService.createQuerySQL(datasource.getType(), table, datasetTableFields.stream().map(DatasetTableField::getOriginName).toArray(String[]::new));
break; break;
case "incremental_add": case "incremental_add":
...@@ -422,70 +435,90 @@ public class ExtractDataService { ...@@ -422,70 +435,90 @@ public class ExtractDataService {
fromStep.setLocation(100, 100); fromStep.setLocation(100, 100);
transMeta.addStep(fromStep); transMeta.addStep(fromStep);
//第二个 (User defined Java class) //第二个 (TextFileOutput)
UserDefinedJavaClassMeta userDefinedJavaClassMeta = new UserDefinedJavaClassMeta(); TextFileOutputMeta textFileOutputMeta = new TextFileOutputMeta();
List<UserDefinedJavaClassMeta.FieldInfo> fields = new ArrayList<>(); textFileOutputMeta.setFilename(data_path + datasetTable.getId());
UserDefinedJavaClassMeta.FieldInfo fieldInfo = new UserDefinedJavaClassMeta.FieldInfo("uuid", ValueMetaInterface.TYPE_STRING, -1, -1); textFileOutputMeta.setExtension("txt");
fields.add(fieldInfo); textFileOutputMeta.setSeparator(";");
userDefinedJavaClassMeta.setFieldInfo(fields); textFileOutputMeta.setFileCompression("None");
List<UserDefinedJavaClassDef> definitions = new ArrayList<UserDefinedJavaClassDef>(); textFileOutputMeta.setEnclosure("\"");
UserDefinedJavaClassDef userDefinedJavaClassDef = new UserDefinedJavaClassDef(UserDefinedJavaClassDef.ClassType.TRANSFORM_CLASS, "Processor", code); textFileOutputMeta.setEncoding("UTF-8");
userDefinedJavaClassDef.setActive(true); TextFileField[] outputFields = new TextFileField[1];
definitions.add(userDefinedJavaClassDef); outputFields[0] = new TextFileField();
userDefinedJavaClassMeta.replaceDefinitions(definitions); textFileOutputMeta.setOutputFields(outputFields);
StepMeta userDefinedJavaClassStep = new StepMeta("UserDefinedJavaClass", "UserDefinedJavaClass", userDefinedJavaClassMeta); StepMeta tostep = new StepMeta("TextFileOutput", "TextFileOutput", textFileOutputMeta);
userDefinedJavaClassStep.setLocation(300, 100);
userDefinedJavaClassStep.setDraw(true);
transMeta.addStep(userDefinedJavaClassStep);
//第三个 (HBaseOutputMeta)
NamedClusterService namedClusterService = new NamedClusterManager();
NamedCluster clusterTemplate = new NamedClusterImpl();
clusterTemplate.setName("hadoop");
clusterTemplate.setZooKeeperHost(zkHost);
clusterTemplate.setZooKeeperPort(zkPort);
clusterTemplate.setStorageScheme("HDFS");
namedClusterService.setClusterTemplate(clusterTemplate);
List<ClusterInitializerProvider> providers = new ArrayList<>();
ClusterInitializer clusterInitializer = new ClusterInitializerImpl(providers);
NamedClusterServiceLocator namedClusterServiceLocator = new NamedClusterServiceLocatorImpl(clusterInitializer);
List<RuntimeTestActionHandler> runtimeTestActionHandlers = new ArrayList<>();
RuntimeTestActionHandler defaultHandler = null;
RuntimeTestActionService runtimeTestActionService = new RuntimeTestActionServiceImpl(runtimeTestActionHandlers, defaultHandler);
RuntimeTester runtimeTester = new RuntimeTesterImpl(new ArrayList<>(Arrays.asList(mock(RuntimeTest.class))), mock(ExecutorService.class), "modules");
Put put = new Put((datasetTable.getId() + "," + "target_mapping").getBytes());
for (DatasetTableField datasetTableField : datasetTableFields) {
put.addColumn("columns".getBytes(), (dataease_column_family + "," + datasetTableField.getOriginName() + "," + datasetTableField.getOriginName()).getBytes(), transToColumnType(datasetTableField.getDeType()).getBytes());
}
put.addColumn("key".getBytes(), "uuid".getBytes(), "String".getBytes());
TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings);
Table tab = getConnection().getTable(pentaho_mappings);
tab.put(put);
HBaseOutputMeta hBaseOutputMeta = new HBaseOutputMeta(namedClusterService, namedClusterServiceLocator, runtimeTestActionService, runtimeTester);
hBaseOutputMeta.setTargetTableName(datasetTable.getId());
hBaseOutputMeta.setTargetMappingName("target_mapping");
hBaseOutputMeta.setNamedCluster(clusterTemplate);
hBaseOutputMeta.setCoreConfigURL(hbase_conf_file);
hBaseOutputMeta.setDisableWriteToWAL(true);
hBaseOutputMeta.setWriteBufferSize("31457280"); //30M
if (extractType.equalsIgnoreCase("incremental_delete")) {
hBaseOutputMeta.setDeleteRowKey(true);
}
StepMeta tostep = new StepMeta("HBaseOutput", "HBaseOutput", hBaseOutputMeta);
tostep.setLocation(600, 100); tostep.setLocation(600, 100);
tostep.setDraw(true); tostep.setDraw(true);
transMeta.addStep(tostep); transMeta.addStep(tostep);
TransHopMeta hi1 = new TransHopMeta(fromStep, userDefinedJavaClassStep); TransHopMeta hi1 = new TransHopMeta(fromStep, tostep);
TransHopMeta hi2 = new TransHopMeta(userDefinedJavaClassStep, tostep);
transMeta.addTransHop(hi1); transMeta.addTransHop(hi1);
transMeta.addTransHop(hi2);
// //第二个 (User defined Java class)
// UserDefinedJavaClassMeta userDefinedJavaClassMeta = new UserDefinedJavaClassMeta();
// List<UserDefinedJavaClassMeta.FieldInfo> fields = new ArrayList<>();
// UserDefinedJavaClassMeta.FieldInfo fieldInfo = new UserDefinedJavaClassMeta.FieldInfo("uuid", ValueMetaInterface.TYPE_STRING, -1, -1);
// fields.add(fieldInfo);
// userDefinedJavaClassMeta.setFieldInfo(fields);
// List<UserDefinedJavaClassDef> definitions = new ArrayList<UserDefinedJavaClassDef>();
// UserDefinedJavaClassDef userDefinedJavaClassDef = new UserDefinedJavaClassDef(UserDefinedJavaClassDef.ClassType.TRANSFORM_CLASS, "Processor", code);
// userDefinedJavaClassDef.setActive(true);
// definitions.add(userDefinedJavaClassDef);
// userDefinedJavaClassMeta.replaceDefinitions(definitions);
//
// StepMeta userDefinedJavaClassStep = new StepMeta("UserDefinedJavaClass", "UserDefinedJavaClass", userDefinedJavaClassMeta);
// userDefinedJavaClassStep.setLocation(300, 100);
// userDefinedJavaClassStep.setDraw(true);
// transMeta.addStep(userDefinedJavaClassStep);
//
// //第三个 (HBaseOutputMeta)
// NamedClusterService namedClusterService = new NamedClusterManager();
// NamedCluster clusterTemplate = new NamedClusterImpl();
// clusterTemplate.setName("hadoop");
// clusterTemplate.setZooKeeperHost(zkHost);
// clusterTemplate.setZooKeeperPort(zkPort);
// clusterTemplate.setStorageScheme("HDFS");
// namedClusterService.setClusterTemplate(clusterTemplate);
//
// List<ClusterInitializerProvider> providers = new ArrayList<>();
// ClusterInitializer clusterInitializer = new ClusterInitializerImpl(providers);
// NamedClusterServiceLocator namedClusterServiceLocator = new NamedClusterServiceLocatorImpl(clusterInitializer);
//
// List<RuntimeTestActionHandler> runtimeTestActionHandlers = new ArrayList<>();
// RuntimeTestActionHandler defaultHandler = null;
//
// RuntimeTestActionService runtimeTestActionService = new RuntimeTestActionServiceImpl(runtimeTestActionHandlers, defaultHandler);
// RuntimeTester runtimeTester = new RuntimeTesterImpl(new ArrayList<>(Arrays.asList(mock(RuntimeTest.class))), mock(ExecutorService.class), "modules");
//
// Put put = new Put((datasetTable.getId() + "," + "target_mapping").getBytes());
// for (DatasetTableField datasetTableField : datasetTableFields) {
// put.addColumn("columns".getBytes(), (dataease_column_family + "," + datasetTableField.getOriginName() + "," + datasetTableField.getOriginName()).getBytes(), transToColumnType(datasetTableField.getDeType()).getBytes());
// }
// put.addColumn("key".getBytes(), "uuid".getBytes(), "String".getBytes());
// TableName pentaho_mappings = TableName.valueOf(this.pentaho_mappings);
// Table tab = getConnection().getTable(pentaho_mappings);
// tab.put(put);
//
// HBaseOutputMeta hBaseOutputMeta = new HBaseOutputMeta(namedClusterService, namedClusterServiceLocator, runtimeTestActionService, runtimeTester);
// hBaseOutputMeta.setTargetTableName(datasetTable.getId());
// hBaseOutputMeta.setTargetMappingName("target_mapping");
// hBaseOutputMeta.setNamedCluster(clusterTemplate);
// hBaseOutputMeta.setCoreConfigURL(hbase_conf_file);
// hBaseOutputMeta.setDisableWriteToWAL(true);
// hBaseOutputMeta.setWriteBufferSize("31457280"); //30M
// if (extractType.equalsIgnoreCase("incremental_delete")) {
// hBaseOutputMeta.setDeleteRowKey(true);
// }
// StepMeta tostep = new StepMeta("HBaseOutput", "HBaseOutput", hBaseOutputMeta);
// tostep.setLocation(600, 100);
//
// tostep.setDraw(true);
// transMeta.addStep(tostep);
// TransHopMeta hi1 = new TransHopMeta(fromStep, userDefinedJavaClassStep);
// TransHopMeta hi2 = new TransHopMeta(userDefinedJavaClassStep, tostep);
// transMeta.addTransHop(hi1);
// transMeta.addTransHop(hi2);
String transXml = transMeta.getXML(); String transXml = transMeta.getXML();
File file = new File(root_path + transName + ".ktr"); File file = new File(root_path + transName + ".ktr");
......
...@@ -41,6 +41,7 @@ import java.util.List; ...@@ -41,6 +41,7 @@ import java.util.List;
@Service @Service
public class SparkCalc { public class SparkCalc {
private static String column_family = "dataease"; private static String column_family = "dataease";
private static String data_path = "/opt/dataease/data/db/";
@Resource @Resource
private Environment env; // 保存了配置文件的信息 private Environment env; // 保存了配置文件的信息
...@@ -54,12 +55,13 @@ public class SparkCalc { ...@@ -54,12 +55,13 @@ public class SparkCalc {
sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1")); sqlContext.setConf("spark.sql.shuffle.partitions", env.getProperty("spark.sql.shuffle.partitions", "1"));
sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1")); sqlContext.setConf("spark.default.parallelism", env.getProperty("spark.default.parallelism", "1"));
Dataset<Row> dataFrame = CacheUtil.getInstance().getCacheData(hTable); Dataset<Row> dataFrame = getData(sparkContext, sqlContext, hTable, fields);
if (ObjectUtils.isEmpty(dataFrame)) { // Dataset<Row> dataFrame = CacheUtil.getInstance().getCacheData(hTable);
dataFrame = getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields); // if (ObjectUtils.isEmpty(dataFrame)) {
} // dataFrame = getData(sparkContext, sqlContext, hTable, fields);
// }
dataFrame.createOrReplaceTempView(tmpTable); dataFrame.createOrReplaceTempView( tmpTable);
Dataset<Row> sql = sqlContext.sql(getSQL(xAxis, yAxis, tmpTable, requestList)); Dataset<Row> sql = sqlContext.sql(getSQL(xAxis, yAxis, tmpTable, requestList));
// transform // transform
List<String[]> data = new ArrayList<>(); List<String[]> data = new ArrayList<>();
...@@ -86,6 +88,69 @@ public class SparkCalc { ...@@ -86,6 +88,69 @@ public class SparkCalc {
return getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields); return getHBaseDataAndCache(sparkContext, sqlContext, hTable, fields);
} }
public Dataset<Row> getData(JavaSparkContext sparkContext, SQLContext sqlContext, String tableId, List<DatasetTableField> fields) throws Exception {
fields.sort((o1, o2) -> {
if (o1.getOriginName() == null) {
return -1;
}
if (o2.getOriginName() == null) {
return 1;
}
return o1.getOriginName().compareTo(o2.getOriginName());
});
JavaRDD<String> pairRDD = sparkContext.textFile(data_path + tableId + ".txt");
JavaRDD<Row> rdd = pairRDD.mapPartitions( (FlatMapFunction<java.util.Iterator<String>, Row>) tuple2Iterator -> {
List<Row> iterator = new ArrayList<>();
while (tuple2Iterator.hasNext()) {
String[] items = tuple2Iterator.next().split(";");
List<Object> list = new ArrayList<>();
for(int i=0; i<items.length; i++){
String l = items[i];
DatasetTableField x = fields.get(i);
if (x.getDeType() == 0 || x.getDeType() == 1) {
list.add(l);
} else if (x.getDeType() == 2) {
if (StringUtils.isEmpty(l)) {
l = "0";
}
if (StringUtils.equalsIgnoreCase(l,"Y")) {
l = "1";
}
if (StringUtils.equalsIgnoreCase(l,"N")) {
l = "0";
}
list.add(Long.valueOf(l));
} else if (x.getDeType() == 3) {
if (StringUtils.isEmpty(l)) {
l = "0.0";
}
list.add(Double.valueOf(l));
}
}
iterator.add(RowFactory.create(list.toArray()));
}
return iterator.iterator();
});
List<StructField> structFields = new ArrayList<>();
// struct顺序要与rdd顺序一致
fields.forEach(x -> {
if (x.getDeType() == 0 || x.getDeType() == 1) {
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.StringType, true));
} else if (x.getDeType() == 2) {
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.LongType, true));
} else if (x.getDeType() == 3) {
structFields.add(DataTypes.createStructField(x.getOriginName(), DataTypes.DoubleType, true));
}
});
StructType structType = DataTypes.createStructType(structFields);
Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType);
return dataFrame;
}
public Dataset<Row> getHBaseDataAndCache(JavaSparkContext sparkContext, SQLContext sqlContext, String hTable, List<DatasetTableField> fields) throws Exception { public Dataset<Row> getHBaseDataAndCache(JavaSparkContext sparkContext, SQLContext sqlContext, String hTable, List<DatasetTableField> fields) throws Exception {
Scan scan = new Scan(); Scan scan = new Scan();
scan.addFamily(Bytes.toBytes(column_family)); scan.addFamily(Bytes.toBytes(column_family));
...@@ -145,7 +210,7 @@ public class SparkCalc { ...@@ -145,7 +210,7 @@ public class SparkCalc {
StructType structType = DataTypes.createStructType(structFields); StructType structType = DataTypes.createStructType(structFields);
Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType).persist(StorageLevel.MEMORY_AND_DISK_SER()); Dataset<Row> dataFrame = sqlContext.createDataFrame(rdd, structType).persist(StorageLevel.MEMORY_AND_DISK_SER());
CacheUtil.getInstance().addCacheData(hTable, dataFrame); // CacheUtil.getInstance().addCacheData(hTable, dataFrame);
dataFrame.count(); dataFrame.count();
return dataFrame; return dataFrame;
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论