示例#1
0
 def find_last(self, data_id: int, topic_id: TopicId,
               tenant_id: TenantId) -> Optional[PipelineMonitorLog]:
     schema = self.get_topic_schema()
     storage = self.ask_storages().ask_topic_storage(schema)
     data_service = ask_topic_data_service(schema, storage,
                                           self.principalService)
     # noinspection SpellCheckingInspection
     page = data_service.page(
         data_service.get_data_entity_helper().get_entity_pager(
             criteria=[
                 EntityCriteriaExpression(left=ColumnNameLiteral(
                     columnName=TopicDataColumnNames.TENANT_ID.value),
                                          right=tenant_id),
                 EntityCriteriaExpression(
                     left=ColumnNameLiteral(columnName='topicid'),
                     right=topic_id),
                 EntityCriteriaExpression(
                     left=ColumnNameLiteral(columnName='dataid'),
                     right=data_id)
             ],
             sort=[
                 EntitySortColumn(
                     name=TopicDataColumnNames.INSERT_TIME.value,
                     method=EntitySortMethod.DESC,
                 )
             ],
             pageable=Pageable(pageNumber=1, pageSize=1)))
     if page.itemCount == 0:
         return None
     else:
         return PipelineMonitorLog(**page.data[0])
async def patch_topic_data(
    topic_name: Optional[str] = None,
    patch_type: Optional[PipelineTriggerType] = PipelineTriggerType.MERGE,
    tenant_id: Optional[TenantId] = None,
    data=Body(...),
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> None:
    """
	data patch will not trigger any pipeline
	"""
    if is_blank(topic_name):
        raise_400('Topic name is required.')
    if patch_type is None:
        patch_type = PipelineTriggerType.MERGE
    if patch_type == PipelineTriggerType.INSERT_OR_MERGE:
        raise_400('Patch type can be one of insert/merge/delete.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_schema(topic_name, tenant_id, principal_service)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)
    if patch_type == PipelineTriggerType.INSERT:
        service.trigger_by_insert(data)
    elif patch_type == PipelineTriggerType.MERGE:
        service.trigger_by_merge(data)
    elif patch_type == PipelineTriggerType.DELETE:
        service.trigger_by_delete(data)
    else:
        raise DataKernelException(
            f'Patch type [{patch_type}] is not supported.')
async def fetch_topic_data_count(
    topic_id: Optional[TopicId] = None,
    tenant_id: Optional[TenantId] = None,
    criteria: Optional[ParameterJoint] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> List[str]:
    if is_blank(topic_id):
        raise_400('Topic id is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    if criteria is None:
        rows = service.find_distinct_values(None,
                                            [TopicDataColumnNames.ID.value],
                                            False)
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        rows = service.find_distinct_values(
            [parsed_criteria.run(empty_variables, principal_service)],
            [TopicDataColumnNames.ID.value], False)

    return ArrayHelper(rows).map(
        lambda x: str(x.get(TopicDataColumnNames.ID.value))).to_list()
示例#4
0
def find_topic_data_service(
    topic_id: TopicId, tenant_id: TenantId, principal_service: PrincipalService
) -> Tuple[TopicSchema, TopicDataService]:
    topic_schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    topic_storage = ask_topic_storage(topic_schema, principal_service)
    topic_service = ask_topic_data_service(topic_schema, topic_storage,
                                           principal_service)
    return topic_schema, topic_service
	def find(self, topic_id: TopicId, start_time: datetime, end_time: datetime) -> Optional[TopicProfile]:
		schema = get_topic_schema(topic_id, self.principalService)
		if is_raw_topic(schema.get_topic()):
			raise DqcException(f'Raw topic[name={schema.get_topic().name}] is not supported for profiling.')
		storage = ask_topic_storage(schema, self.principalService)
		service = ask_topic_data_service(schema, storage, self.principalService)
		criteria = [
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.TENANT_ID.value),
				right=self.principalService.get_tenant_id()),
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.UPDATE_TIME.value),
				operator=EntityCriteriaOperator.GREATER_THAN_OR_EQUALS,
				right=start_time),
			EntityCriteriaExpression(
				left=ColumnNameLiteral(columnName=TopicDataColumnNames.UPDATE_TIME.value),
				operator=EntityCriteriaOperator.LESS_THAN_OR_EQUALS,
				right=end_time)
		]
		data = service.find(criteria)

		columns = [
			TopicDataColumnNames.ID.value,
			*ArrayHelper(schema.get_topic().factors).map(lambda x: x.name).to_list(),
			TopicDataColumnNames.TENANT_ID.value,
			TopicDataColumnNames.INSERT_TIME.value,
			TopicDataColumnNames.UPDATE_TIME.value
		]

		def row_to_list(row: Dict[str, Any]) -> List[Any]:
			return ArrayHelper(columns).map(lambda x: row.get(x)).to_list()

		data_frame = build_data_frame(ArrayHelper(data).map(row_to_list).to_list(), columns)
		data_frame = convert_data_frame_type_by_topic(data_frame, schema.get_topic())
		data_frame.drop([
			TopicDataColumnNames.TENANT_ID,
			TopicDataColumnNames.UPDATE_TIME,
			TopicDataColumnNames.INSERT_TIME,
			TopicDataColumnNames.AGGREGATE_ASSIST,
			TopicDataColumnNames.ID,
			TopicDataColumnNames.VERSION
		], axis=1, inplace=True, errors='ignore')

		if data_frame.empty or len(data_frame.index) == 1:
			return None
		else:
			logger.info(f'memory_usage {data_frame.memory_usage(deep=True).sum()} bytes')
			profile = ProfileReport(data_frame, title=f'{schema.get_topic().name} data profile report', minimal=True)
			json_data = profile.to_json()
			json_constants_map = {
				'-Infinity': float('-Infinity'),
				'Infinity': float('Infinity'),
				'NaN': None,
			}
			return loads(json_data, parse_constant=lambda x: json_constants_map[x])
def exchange_topic_data_service(data_service: TopicDataService, topic_id: TopicId) -> TopicDataService:
	principal_service = data_service.get_principal_service()
	topic_service = get_topic_service(principal_service)
	topic = topic_service.find_by_id(topic_id)
	if topic is None:
		raise DqcException(f'Topic[id={topic_id}] not found.')
	schema = topic_service.find_schema_by_name(topic.name, principal_service.get_tenant_id())
	if schema is None:
		raise DqcException(f'Topic[name={topic.name}] not found.')
	storage = ask_topic_storage(schema, principal_service)
	return ask_topic_data_service(schema, storage, data_service.get_principal_service())
async def fetch_topic_data(
    topic_name: Optional[str] = None,
    topic_id: Optional[TopicId] = None,
    tenant_id: Optional[TenantId] = None,
    criteria: TopicPageable = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> DataPage:
    if is_blank(topic_name) and is_blank(topic_id):
        raise_400('Topic id or name is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    if is_not_blank(topic_id):
        schema = get_topic_service(principal_service).find_schema_by_id(
            topic_id, tenant_id)
    else:
        schema = get_topic_schema(topic_name, tenant_id, principal_service)

    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    pageable = Pageable(
        pageNumber=1 if criteria is None or criteria.pageNumber is None
        or criteria.pageNumber <= 0 else criteria.pageNumber,
        pageSize=100 if criteria is None or criteria.pageSize is None
        or criteria.pageSize <= 0 else criteria.pageSize)
    if criteria is None or is_blank(
            criteria.jointType) or criteria.filters is None:
        page = service.page_and_unwrap(None, pageable)
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        page = service.page_and_unwrap(
            [parsed_criteria.run(empty_variables, principal_service)],
            pageable)

    def id_to_str(row: Dict[str, Any]) -> Dict[str, Any]:
        if TopicDataColumnNames.ID.value in row:
            copy = row.copy()
            copy[TopicDataColumnNames.ID.value] = str(
                row[TopicDataColumnNames.ID.value])
            return copy
        else:
            return row

    page.data = ArrayHelper(page.data).map(id_to_str).to_list()
    return page
async def truncate_topic_data(
    topic_name: Optional[str] = None,
    tenant_id: Optional[TenantId] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> None:
    if not ask_truncate_topic_data():
        raise_404('Not Found')
    if is_blank(topic_name):
        raise_400('Topic name is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_schema(topic_name, tenant_id, principal_service)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)
    service.truncate()
 def get_topic_data_service(
         self, topic_id: TopicId,
         rules_count: int) -> Tuple[bool, Optional[TopicDataService]]:
     topic_service = get_topic_service(self.principalService)
     topic = topic_service.find_by_id(topic_id)
     if topic is None:
         # ignore and log
         logger.error(
             f'Topic[id={topic_id}] not found, ignored {rules_count} monitor rule(s).'
         )
         return False, None
     schema = topic_service.find_schema_by_name(
         topic.name, self.principalService.get_tenant_id())
     if schema is None:
         # ignore and log
         logger.error(
             f'Topic[name={topic.name}] not found, ignored {rules_count} monitor rule(s).'
         )
         return False, None
     storage = ask_topic_storage(schema, self.principalService)
     data_service = ask_topic_data_service(schema, storage,
                                           self.principalService)
     return True, data_service
async def fetch_topic_data_count(
    topic_id: Optional[TopicId],
    tenant_id: Optional[TenantId] = None,
    criteria: Optional[ParameterJoint] = None,
    principal_service: PrincipalService = Depends(get_any_admin_principal)
) -> int:
    if is_blank(topic_id):
        raise_400('Topic id is required.')
    tenant_id = validate_tenant_id(tenant_id, principal_service)
    principal_service = fake_to_tenant(principal_service, tenant_id)

    schema = get_topic_service(principal_service).find_schema_by_id(
        topic_id, tenant_id)
    storage = ask_topic_storage(schema, principal_service)
    service = ask_topic_data_service(schema, storage, principal_service)

    if criteria is None:
        return service.count()
    else:
        parsed_criteria = parse_condition_for_storage(criteria, [schema],
                                                      principal_service, False)
        empty_variables = PipelineVariables(None, None, None)
        return service.count_by_criteria(
            [parsed_criteria.run(empty_variables, principal_service)])
示例#11
0
    def page(self, criteria: PipelineMonitorLogCriteria) -> DataPage:
        schema = self.get_topic_schema()
        storage = self.ask_storages().ask_topic_storage(schema)
        data_service = ask_topic_data_service(schema, storage,
                                              self.principalService)

        entity_criteria = [
            EntityCriteriaExpression(left=ColumnNameLiteral(
                columnName=TopicDataColumnNames.TENANT_ID.value),
                                     right=criteria.tenantId)
        ]
        if is_not_blank(criteria.traceId):
            # noinspection SpellCheckingInspection
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(columnName='traceid'),
                    right=criteria.traceId))
        if is_not_blank(criteria.topicId):
            # noinspection SpellCheckingInspection
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(columnName='topicid'),
                    right=criteria.topicId))
        if is_not_blank(criteria.pipelineId):
            # noinspection SpellCheckingInspection
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(columnName='pipelineid'),
                    right=criteria.pipelineId))
        if is_not_blank(criteria.status):
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(columnName='status'),
                    right=criteria.status))
        start_date_parsed, start_date = is_date(criteria.startDate,
                                                ask_datetime_formats())
        end_date_parsed, end_date = is_date(criteria.endDate,
                                            ask_datetime_formats())
        if start_date_parsed:
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(
                        columnName=TopicDataColumnNames.INSERT_TIME.value),
                    operator=EntityCriteriaOperator.GREATER_THAN_OR_EQUALS,
                    right=start_date))
        if end_date_parsed:
            entity_criteria.append(
                EntityCriteriaExpression(
                    left=ColumnNameLiteral(
                        columnName=TopicDataColumnNames.INSERT_TIME.value),
                    operator=EntityCriteriaOperator.LESS_THAN_OR_EQUALS,
                    right=end_date))

        page = data_service.page(
            data_service.get_data_entity_helper().get_entity_pager(
                criteria=entity_criteria,
                pageable=Pageable(pageNumber=criteria.pageNumber,
                                  pageSize=criteria.pageSize)))

        page.data = ArrayHelper(page.data).map(lambda x: x.get(TopicDataColumnNames.RAW_TOPIC_DATA.value)) \
         .filter(lambda x: x is not None) \
         .map(lambda x: PipelineMonitorLog(**x)) \
         .to_list()
        return page
示例#12
0
	def ask_topic_data_service(self, schema: TopicSchema) -> TopicDataService:
		"""
		ask topic data service
		"""
		storage = self.storages.ask_topic_storage(schema)
		return ask_topic_data_service(schema, storage, self.principalService)