def prepare_topic(): data_source = DataSource(dataSourceId='1', dataSourceCode='ds1', dataSourceType=DataSourceType.MYSQL, host='localhost', port='3306', username='******', password='******', name='watchmen', tenantId='1') CacheService.data_source().put(data_source) return Topic(topicId='1', name='topic_x', type=TopicType.DISTINCT, kind=TopicKind.BUSINESS, factors=[ Factor(factorId='1', name='topic1_id', type=FactorType.SEQUENCE, indexGroup='u-1'), Factor(factorId='2', name='topic1_text', type=FactorType.TEXT, precision='64', indexGroup='u-1') ], dataSourceId=data_source.dataSourceId, tenantId='1')
def prepare_data(self): data_source = DataSource(dataSourceId='1', dataSourceCode='test', dataSourceType=DataSourceType.MYSQL, host='localhost', port='3306', username='******', password='******', name='watchmen', tenantId='1') data_source_service = get_data_source_service( create_fake_principal_service()) data_source_service.begin_transaction() data_source_service.create(data_source) data_source_service.commit_transaction() CacheService.data_source().put(data_source) topic1 = Topic(topicId='1', name='topic1', type=TopicType.DISTINCT, kind=TopicKind.BUSINESS, factors=[ Factor(factorId='1', name='topic1_id', type=FactorType.SEQUENCE), Factor(factorId='2', name='topic1_enabled', type=FactorType.BOOLEAN) ], dataSourceId=data_source.dataSourceId, tenantId='1') topic_service = get_topic_service(create_fake_principal_service()) topic_service.begin_transaction() topic_service.create(topic1) topic_service.commit_transaction() CacheService.topic().put(topic1) space = Space(spaceId='1', name='Space1', topicIds=[topic1.topicId], tenantId='1') space_service = get_space_service(create_fake_principal_service()) space_service.begin_transaction() space_service.create(space) space_service.commit_transaction() connected_space = ConnectedSpace(connectId='1', name='ConnectedSpace1', spaceId=space.spaceId, isTemplate=False, userId='1', tenantId='1') connected_space_service = get_connected_space_service( create_fake_principal_service()) connected_space_service.begin_transaction() connected_space_service.create(connected_space) connected_space_service.commit_transaction()
def build_target_topic_factors(source_topic: Topic) -> List[Factor]: return [ *ArrayHelper(source_topic.factors).map_with_index(redress_factor_id).to_list(), Factor( factorId=f'ss-{len(source_topic.factors) + 1}', type=FactorType.TEXT, name='snapshottag', label='Snapshot Tag', indexGroup=FactorIndexGroup.INDEX_1, precision='10' ) ]
def prepare_new_topic(): return Topic(topicId='1', name='topic_x', type=TopicType.DISTINCT, kind=TopicKind.BUSINESS, factors=[ Factor(factorId='1', name='topic1_id', type=FactorType.SEQUENCE), Factor(factorId='2', name='topic1_text', type=FactorType.TEXT, precision='64', indexGroup='u-1'), Factor(factorId='2', name='topic1_text2', type=FactorType.TEXT, precision='32', indexGroup='u-1') ], dataSourceId='1', tenantId='1')
def build_task_topic_factors(source_topic: Topic) -> List[Factor]: return [ Factor( factorId=f'ss-0', type=FactorType.TEXT, name='originaldataid', flatten=True, label='Original Data Id', precision='50' ), *ArrayHelper(source_topic.factors).map_with_index(redress_factor_id).to_list(), Factor( factorId=f'ss-{len(source_topic.factors) + 1}', type=FactorType.TEXT, name='status', label='Status of task', flatten=True, indexGroup=FactorIndexGroup.INDEX_1, precision='20' ), Factor( factorId=f'ss-{len(source_topic.factors) + 2}', type=FactorType.TEXT, name='snapshottag', label='Snapshot Tag', flatten=True, indexGroup=FactorIndexGroup.INDEX_2, precision='10' ), Factor( factorId=f'ss-{len(source_topic.factors) + 3}', type=FactorType.TEXT, name='targettopicname', label='Target topic name', flatten=True, indexGroup=FactorIndexGroup.INDEX_3, precision='50' ), Factor( factorId=f'ss-{len(source_topic.factors) + 4}', type=FactorType.TEXT, name='jobid', label='Job Id', flatten=True, indexGroup=FactorIndexGroup.INDEX_4, precision='50' ), Factor( factorId=f'ss-{len(source_topic.factors) + 5}', type=FactorType.TEXT, name='schedulerid', label='Job Scheduler Id', flatten=True, indexGroup=FactorIndexGroup.INDEX_5, precision='50' ) ]
def test_criteria(self): joint = ParameterJoint( jointType=ParameterJointType.AND, filters=[ ParameterExpression( left=ConstantParameter(kind=ParameterKind.CONSTANT, value="ABC"), operator=ParameterExpressionOperator.EQUALS, right=ConstantParameter( kind=ParameterKind.CONSTANT, value="{policy.productCode}-{policy.policyNo}")) ]) parsed = parse_condition_for_storage(joint, [ TopicSchema( Topic(name='policy', factors=[ Factor(name='productCode', type=FactorType.TEXT), Factor(name='policyNo', type=FactorType.TEXT) ])) ], create_fake_principal_service(), True) variables = PipelineVariables(None, {}, None) variables.put('policy', {'productCode': 'A001', 'policyNo': 'P001'}) result = parsed.run(variables, create_fake_principal_service()) print(result.to_dict())
def fill_factor_id(factor: Factor) -> None: old_factor_id = factor.factorId factor.factorId = factor_service.generate_factor_id() factor_id_map[old_factor_id] = factor.factorId
def redress_factor_id(factor: Factor, index: int) -> Factor: # remove index factor.indexGroup = None factor.factorId = f'ss-{index + 1}' return factor
def ask_pipeline_monitor_topics() -> List[Topic]: # TODO define all pipeline monitor topics return [ Topic(name='raw_pipeline_monitor_log', kind=TopicKind.SYSTEM, type=TopicType.RAW, factors=[ Factor(factorId='rpml-f-1', name='uid', type=FactorType.TEXT), Factor(factorId='rpml-f-2', name='traceId', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_1, precision='50'), Factor(factorId='rpml-f-3', name='pipelineId', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_2, precision='50'), Factor(factorId='rpml-f-4', name='topicId', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_3, precision='50'), Factor(factorId='rpml-f-5', name='prerequisite', type=FactorType.BOOLEAN), Factor(factorId='rpml-f-6', name='prerequisiteDefinedAs', type=FactorType.OBJECT), Factor(factorId='rpml-f-7', name='status', type=FactorType.TEXT, flatten=True), Factor(factorId='rpml-f-8', name='startTime', type=FactorType.FULL_DATETIME, flatten=True), Factor(factorId='rpml-f-9', name='spentInMills', type=FactorType.UNSIGNED, flatten=True), Factor(factorId='rpml-f-10', name='error', type=FactorType.TEXT), Factor(factorId='rpml-f-11', name='oldValue', type=FactorType.OBJECT), Factor(factorId='rpml-f-12', name='newValue', type=FactorType.OBJECT), Factor(factorId='rpml-f-13', name='stages', type=FactorType.ARRAY), Factor(factorId='rpml-f-14', name='stages.stageId', type=FactorType.TEXT), Factor(factorId='rpml-f-15', name='stages.name', type=FactorType.TEXT), Factor(factorId='rpml-f-16', name='stages.prerequisite', type=FactorType.BOOLEAN), Factor(factorId='rpml-f-17', name='stages.prerequisiteDefinedAs', type=FactorType.OBJECT), Factor(factorId='rpml-f-18', name='stages.status', type=FactorType.TEXT), Factor(factorId='rpml-f-19', name='stages.startTime', type=FactorType.FULL_DATETIME), Factor(factorId='rpml-f-20', name='stages.spentInMills', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-21', name='stages.error', type=FactorType.TEXT), Factor(factorId='rpml-f-22', name='stages.units', type=FactorType.ARRAY), Factor(factorId='rpml-f-23', name='stages.units.unitId', type=FactorType.TEXT), Factor(factorId='rpml-f-24', name='stages.units.name', type=FactorType.TEXT), Factor(factorId='rpml-f-25', name='stages.units.prerequisite', type=FactorType.BOOLEAN), Factor(factorId='rpml-f-26', name='stages.units.prerequisiteDefinedAs', type=FactorType.OBJECT), Factor(factorId='rpml-f-27', name='stages.units.loopVariableName', type=FactorType.TEXT), Factor(factorId='rpml-f-28', name='stages.units.loopVariableValue', type=FactorType.ARRAY), Factor(factorId='rpml-f-29', name='stages.units.status', type=FactorType.TEXT), Factor(factorId='rpml-f-30', name='stages.units.startTime', type=FactorType.FULL_DATETIME), Factor(factorId='rpml-f-31', name='stages.units.spentInMills', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-32', name='stages.units.error', type=FactorType.TEXT), Factor(factorId='rpml-f-33', name='stages.units.actions', type=FactorType.ARRAY), Factor(factorId='rpml-f-34', name='stages.units.actions.uid', type=FactorType.TEXT), Factor(factorId='rpml-f-35', name='stages.units.actions.actionId', type=FactorType.TEXT), Factor(factorId='rpml-f-36', name='stages.units.actions.type', type=FactorType.TEXT), Factor(factorId='rpml-f-37', name='stages.units.actions.insertCount', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-38', name='stages.units.actions.updateCount', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-39', name='stages.units.actions.deleteCount', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-40', name='stages.units.actions.definedAs', type=FactorType.OBJECT), Factor(factorId='rpml-f-41', name='stages.units.actions.findBy', type=FactorType.OBJECT), Factor(factorId='rpml-f-42', name='stages.units.actions.touched', type=FactorType.OBJECT), Factor(factorId='rpml-f-43', name='stages.units.actions.status', type=FactorType.TEXT), Factor(factorId='rpml-f-44', name='stages.units.actions.startTime', type=FactorType.FULL_DATETIME), Factor(factorId='rpml-f-45', name='stages.units.actions.spentInMills', type=FactorType.UNSIGNED), Factor(factorId='rpml-f-46', name='stages.units.actions.error', type=FactorType.TEXT), Factor(factorId='rpml-f-47', name='dataId', type=FactorType.NUMBER, flatten=True, indexGroup=FactorIndexGroup.INDEX_4, precision='20'), ], description='Pipeline monitor log raw topic.') ]
def ask_dqc_topics() -> List[Topic]: # TODO define all dqc topics return [ Topic( name='dqc_raw_rule_result', kind=TopicKind.SYSTEM, type=TopicType.RAW, factors=[ Factor(factorId='dra-f-1', name='ruleCode', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_1, precision='50'), Factor(factorId='dra-f-2', name='topicId', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_2, precision='50'), Factor(factorId='dra-f-3', name='topicName', type=FactorType.TEXT), Factor(factorId='dra-f-4', name='factorId', type=FactorType.TEXT, flatten=True, indexGroup=FactorIndexGroup.INDEX_3, precision='50'), Factor(factorId='dra-f-5', name='factorName', type=FactorType.TEXT), Factor(factorId='dra-f-6', name='detected', type=FactorType.BOOLEAN, flatten=True), Factor(factorId='dra-f-7', name='severity', type=FactorType.TEXT), # the start day of date range # sunday of weekly; 1st of monthly. Factor(factorId='dra-f-8', name='processDate', type=FactorType.DATE, flatten=True, indexGroup=FactorIndexGroup.INDEX_4) ], description='Topic data monitor by rules, raw topic.'), Topic(name='dqc_rule_daily', kind=TopicKind.SYSTEM, type=TopicType.DISTINCT, factors=[ Factor(factorId='dra-f-1', name='ruleCode', type=FactorType.TEXT, indexGroup=FactorIndexGroup.INDEX_1), Factor(factorId='dra-f-2', name='topicId', type=FactorType.TEXT, indexGroup=FactorIndexGroup.INDEX_2), Factor(factorId='dra-f-3', name='factorId', type=FactorType.TEXT, indexGroup=FactorIndexGroup.INDEX_3), Factor(factorId='dra-f-4', name='year', type=FactorType.YEAR, indexGroup=FactorIndexGroup.INDEX_4), Factor(factorId='dra-f-5', name='month', type=FactorType.MONTH, indexGroup=FactorIndexGroup.INDEX_5), Factor(factorId='dra-f-6', name='day', type=FactorType.DAY_OF_MONTH, indexGroup=FactorIndexGroup.INDEX_6), Factor(factorId='dra-f-7', name='processDate', type=FactorType.DATE, indexGroup=FactorIndexGroup.INDEX_7), Factor(factorId='dra-f-8', name='count', type=FactorType.UNSIGNED, precision='10') ], description='Topic data monitor by rules, distinct topic.') ]