def ask_topic_storage( topic_or_schema: Union[Topic, TopicSchema], principal_service: PrincipalService) -> TopicDataStorageSPI: topic = topic_or_schema if isinstance( topic_or_schema, Topic) else topic_or_schema.get_topic() data_source_id = topic.dataSourceId if is_blank(data_source_id): raise DataKernelException( f'Data source is not defined for topic[id={topic.topicId}, name={topic.name}]' ) build = CacheService.data_source().get_builder(data_source_id) if build is not None: return build() data_source = get_data_source_service(principal_service).find_by_id( data_source_id) if data_source is None: raise DataKernelException( f'Data source not declared for topic' f'[id={topic.topicId}, name={topic.name}, dataSourceId={data_source_id}]' ) build = build_topic_data_storage(data_source) CacheService.data_source().put_builder(data_source_id, build) return build()
def find_topic( self, topic_id: Optional[TopicId], available_schemas: List[TopicSchema], principal_service: PrincipalService, allow_in_memory_variables: bool) -> Tuple[TopicSchema, bool]: """ find topic even it is not found in available list when in-memory variables is allowed. for the secondary one of return tuple, true when it is found in given available list, otherwise is false, which means it can be find in definitions """ if is_blank(topic_id): raise DataKernelException(f'Topic not declared.') schema = ArrayHelper(available_schemas).find(lambda x: x.get_topic().topicId == topic_id) if schema is None: if not allow_in_memory_variables: raise DataKernelException(f'Topic[id={topic_id}] not found.') else: # in pipeline, it might be from trigger data topic_service = get_topic_service(principal_service) topic: Optional[Topic] = topic_service.find_by_id(topic_id) if topic is None: raise DataKernelException(f'Topic[id={topic_id}] not found.') schema: Optional[TopicSchema] = topic_service.find_schema_by_name( topic.name, principal_service.get_tenant_id()) if schema is None: raise DataKernelException(f'Topic schema[id={topic_id}] not found.') return schema, False else: return schema, True
def find_factor(self, factor_id: Optional[FactorId], topic: Topic) -> Factor: if is_blank(factor_id): raise DataKernelException(f'Factor not declared.') factor: Optional[Factor] = ArrayHelper(topic.factors).find(lambda x: x.factorId == factor_id) if factor is None: raise DataKernelException( f'Factor[id={factor_id}] in topic[id={topic.topicId}, name={topic.name}] not found.') return factor
def get_topic_schema( topic_id: TopicId, principal_service: PrincipalService) -> TopicSchema: topic_service = get_topic_service(principal_service) topic = get_topic_service(principal_service).find_by_id(topic_id) if topic is None: raise DataKernelException(f'Topic[id={topic_id}] not found.') schema = topic_service.find_schema_by_name(topic.name, principal_service.get_tenant_id()) if schema is None: raise DataKernelException(f'Topic[name={topic.name}] not found.') return schema
def create_particular(self, params: Dict[str, Any]) -> Encryptor: key = params.get('key') if is_blank(key) or len(key) != 32: raise DataKernelException( f'Parameter key[{key}] should be 32 digits.') iv = params.get('iv') if is_blank(iv) or len(iv) != 16: raise DataKernelException( f'Parameter iv[{iv}] should be 16 digits.') return AESEncryptor(key, iv)
def __init__( self, schema: TopicSchema, mapping: Optional[List[MappingFactor]], principal_service: PrincipalService): self.mapping = mapping if mapping is None: raise DataKernelException('Mapping cannot be none.') if len(mapping) == 0: raise DataKernelException('At least one mapping is required.') self.parsedMappingFactors = ArrayHelper(mapping).map( lambda x: ParsedStorageMappingFactor(schema, x, principal_service)).to_list()
def parse_condition_for_storage( condition: Optional[ParameterCondition], available_schemas: List[TopicSchema], principal_service: PrincipalService, allow_in_memory_variables: bool) -> ParsedStorageCondition: if condition is None: raise DataKernelException('Condition cannot be none.') if isinstance(condition, ParameterJoint): return ParsedStorageJoint(condition, available_schemas, principal_service, allow_in_memory_variables) elif isinstance(condition, ParameterExpression): return ParsedStorageExpression(condition, available_schemas, principal_service, allow_in_memory_variables) else: raise DataKernelException(f'Condition[{condition.dict()}] is not supported.')
def parse_condition_in_memory( condition: Optional[ParameterCondition], principal_service: PrincipalService) -> ParsedMemoryCondition: if condition is None: raise DataKernelException('Condition cannot be none.') if isinstance(condition, ParameterJoint): return ParsedMemoryJoint(condition, principal_service) elif isinstance(condition, ParameterExpression): return ParsedMemoryExpression(condition, principal_service) else: raise DataKernelException( f'Condition[{condition.dict()}] is not supported.')
def run(self, variables: PipelineVariables, principal_service: PrincipalService) -> bool: left_value = self.left.value(variables, principal_service) if self.operator == ParameterExpressionOperator.EMPTY: return is_empty(left_value) elif self.operator == ParameterExpressionOperator.NOT_EMPTY: return is_not_empty(left_value) right_value = self.right.value(variables, principal_service) if self.operator == ParameterExpressionOperator.EQUALS: return self.equals(left_value, right_value) elif self.operator == ParameterExpressionOperator.NOT_EQUALS: return self.not_equals(left_value, right_value) elif self.operator == ParameterExpressionOperator.LESS: return self.less_than(left_value, right_value) elif self.operator == ParameterExpressionOperator.LESS_EQUALS: return self.less_than_or_equals(left_value, right_value) elif self.operator == ParameterExpressionOperator.MORE: return self.greater_than(left_value, right_value) elif self.operator == ParameterExpressionOperator.MORE_EQUALS: return self.greater_than_or_equals(left_value, right_value) elif self.operator == ParameterExpressionOperator.IN: return self.exists(left_value, right_value) elif self.operator == ParameterExpressionOperator.NOT_IN: return self.not_exists(left_value, right_value) else: raise DataKernelException( f'Operator[{self.operator}] is not supported, found from expression[{self.condition.dict()}].' )
async def patch_topic_data( topic_name: Optional[str] = None, patch_type: Optional[PipelineTriggerType] = PipelineTriggerType.MERGE, tenant_id: Optional[TenantId] = None, data=Body(...), principal_service: PrincipalService = Depends(get_any_admin_principal) ) -> None: """ data patch will not trigger any pipeline """ if is_blank(topic_name): raise_400('Topic name is required.') if patch_type is None: patch_type = PipelineTriggerType.MERGE if patch_type == PipelineTriggerType.INSERT_OR_MERGE: raise_400('Patch type can be one of insert/merge/delete.') tenant_id = validate_tenant_id(tenant_id, principal_service) principal_service = fake_to_tenant(principal_service, tenant_id) schema = get_topic_schema(topic_name, tenant_id, principal_service) storage = ask_topic_storage(schema, principal_service) service = ask_topic_data_service(schema, storage, principal_service) if patch_type == PipelineTriggerType.INSERT: service.trigger_by_insert(data) elif patch_type == PipelineTriggerType.MERGE: service.trigger_by_merge(data) elif patch_type == PipelineTriggerType.DELETE: service.trigger_by_delete(data) else: raise DataKernelException( f'Patch type [{patch_type}] is not supported.')
def action(variables: PipelineVariables, principal_service: PrincipalService) -> Any: if end_parsed: e_parsed = True e_date = end_date else: e_parsed, e_date = parse_variable_to_value( end_variable_name, variables, available_schemas, allow_in_memory_variables, get_date_from_variables, principal_service) if start_parsed: s_parsed = True, s_date = start_date else: s_parsed, s_date = parse_variable_to_value( start_variable_name, variables, available_schemas, allow_in_memory_variables, get_date_from_variables, principal_service) if e_parsed and s_parsed: diff = compute_date_diff(function, e_date, s_date, variable_name) return diff if len(prefix) == 0 else f'{prefix}{diff}' else: if function == VariablePredefineFunctions.YEAR_DIFF: operator = ComputedLiteralOperator.YEAR_DIFF elif function == VariablePredefineFunctions.MONTH_DIFF: operator = ComputedLiteralOperator.MONTH_DIFF elif function == VariablePredefineFunctions.DAY_DIFF: operator = ComputedLiteralOperator.DAY_DIFF else: raise DataKernelException(f'Variable name[{variable_name}] is not supported.') func = create_ask_value_for_computed(operator, [e_date, s_date]) return func(variables, principal_service)
def create_date_format( prefix: str, variable_name: str ) -> Callable[[PipelineVariables, PrincipalService], Any]: parsed_params = parse_function_in_variable( variable_name, VariablePredefineFunctions.DATE_FORMAT.value, 2) variable_name = parsed_params[0] date_format = parsed_params[1] if is_blank(date_format): raise DataKernelException( f'Date format[{date_format}] cannot be recognized.') date_format = translate_date_format_to_memory(date_format) parsed, parsed_date = test_date(variable_name) if parsed: # noinspection PyUnusedLocal def action(variables: PipelineVariables, principal_service: PrincipalService) -> Any: return parsed_date.strftime(date_format) else: def action(variables: PipelineVariables, principal_service: PrincipalService) -> Any: date_parsed, value, a_date = get_date_from_variables( variables, principal_service, variable_name) if not date_parsed: raise DataKernelException( f'Value[{value}] cannot be parsed to date or datetime.') return a_date.strftime(date_format) def run(variables: PipelineVariables, principal_service: PrincipalService) -> Any: value = action(variables, principal_service) return value if is_blank(prefix) else f'{prefix}{value}' return run
def __init__(self, schema: TopicSchema, mapping_factor: MappingFactor, principal_service: PrincipalService): self.mappingFactor = mapping_factor self.arithmetic = AggregateArithmetic.NONE if mapping_factor.arithmetic is None else mapping_factor.arithmetic factor_id = mapping_factor.factorId if is_blank(factor_id): raise DataKernelException('Factor not declared on mapping.') topic = schema.get_topic() factor: Optional[Factor] = ArrayHelper(topic.factors).find(lambda x: x.factorId == factor_id) if factor is None: raise DataKernelException( f'Factor[id={factor_id}] in topic[id={topic.topicId}, name={topic.name}] not found.') self.factor = factor if mapping_factor.source is None: raise DataKernelException('Source of mapping factor not declared.') # parameter in mapping factor always retrieve value from variables self.parsedParameter = parse_parameter_in_memory(mapping_factor.source, principal_service)
def parse_parameter_for_storage( parameter: Optional[Parameter], available_schemas: List[TopicSchema], principal_service: PrincipalService, allow_in_memory_variables: bool ) -> ParsedStorageParameter: if parameter is None: raise DataKernelException('Parameter cannot be none.') elif isinstance(parameter, TopicFactorParameter): return ParsedStorageTopicFactorParameter( parameter, available_schemas, principal_service, allow_in_memory_variables) elif isinstance(parameter, ConstantParameter): return ParsedStorageConstantParameter( parameter, available_schemas, principal_service, allow_in_memory_variables) elif isinstance(parameter, ComputedParameter): return ParsedStorageComputedParameter( parameter, available_schemas, principal_service, allow_in_memory_variables) else: raise DataKernelException(f'Parameter[{parameter.dict()}] is not supported.')
def action(variables: PipelineVariables, principal_service: PrincipalService) -> Any: date_parsed, value, a_date = get_date_from_variables( variables, principal_service, variable_name) if not date_parsed: raise DataKernelException( f'Value[{value}] cannot be parsed to date or datetime.') return a_date.strftime(date_format)
def __init__(self, digits: int): self.digits = digits if digits == 3: self.method = FactorEncryptMethod.MASK_LAST_3 elif digits == 6: self.method = FactorEncryptMethod.MASK_LAST_6 else: raise DataKernelException(f'Only 3 or 6 digits last mask is supported, current is [{digits}].')
def __init__(self, month: bool, day: bool, formats: List[str]): self.maskMonth = month self.maskDay = day if month and day: self.method = FactorEncryptMethod.MASK_MONTH_DAY elif month: self.method = FactorEncryptMethod.MASK_MONTH elif day: self.method = FactorEncryptMethod.MASK_DAY else: raise DataKernelException( f'At least one of month or day should be masked, current is none.' ) self.formats = formats if formats is None or len(formats) == 0: raise DataKernelException( f'At least one date format should be provided, current is none.' )
def to_decimal(value: Any) -> Decimal: if value is None: return Decimal(0) decimal_value = try_to_decimal(value) if decimal_value is None: raise DataKernelException( f'Cannot retrieve[key={name}, current={current_name}] from [{data}].' ) else: return decimal_value
def __init__(self, digits: int): self.digits = digits if digits == 3: self.method = FactorEncryptMethod.MASK_CENTER_3 elif digits == 5: self.method = FactorEncryptMethod.MASK_CENTER_5 else: raise DataKernelException( f'Only 3 or 5 digits center mask is supported, current is [{digits}].' )
def build_id_criteria(self, data: Dict[str, Any]) -> EntityCriteria: data_entity_helper = self.get_data_entity_helper() criteria: EntityCriteria = [] by_id = data_entity_helper.build_id_criteria(data) if by_id is None: raise DataKernelException(f'Id not found from given data[{data}].') criteria.append(by_id) return criteria
def compute_date_diff(function: VariablePredefineFunctions, end_date: date, start_date: date, variable_name: str) -> int: if function == VariablePredefineFunctions.YEAR_DIFF: return year_diff(end_date, start_date) elif function == VariablePredefineFunctions.MONTH_DIFF: return month_diff(end_date, start_date) elif function == VariablePredefineFunctions.DAY_DIFF: return (truncate_time(end_date) - truncate_time(start_date)).days else: raise DataKernelException( f'Constant[{variable_name}] is not supported.')
def create_run_constant_segment( variable: MightAVariable ) -> Callable[[PipelineVariables, PrincipalService], Any]: prefix = variable.text variable_name = variable.variable if variable_name == VariablePredefineFunctions.NEXT_SEQ.value: return create_snowflake_generator(prefix) elif variable_name == VariablePredefineFunctions.NOW.value: return lambda variables, principal_service: get_current_time_in_seconds( ) elif variable_name.startswith(VariablePredefineFunctions.YEAR_DIFF.value): return create_date_diff(prefix, variable_name, VariablePredefineFunctions.YEAR_DIFF) elif variable_name.startswith(VariablePredefineFunctions.MONTH_DIFF.value): return create_date_diff(prefix, variable_name, VariablePredefineFunctions.MONTH_DIFF) elif variable_name.startswith(VariablePredefineFunctions.DAY_DIFF.value): return create_date_diff(prefix, variable_name, VariablePredefineFunctions.DAY_DIFF) elif variable_name.startswith( VariablePredefineFunctions.DATE_FORMAT.value): return create_date_format(prefix, variable_name) elif variable_name.startswith( VariablePredefineFunctions.FROM_PREVIOUS_TRIGGER_DATA.value): if variable_name == VariablePredefineFunctions.FROM_PREVIOUS_TRIGGER_DATA.value: if is_blank(prefix): return create_previous_trigger_data() else: raise DataKernelException( f'Previous trigger data is a dict, cannot prefix by a string[{prefix}].' ) length = len( VariablePredefineFunctions.FROM_PREVIOUS_TRIGGER_DATA.value) if len(variable_name) < length + 2 or variable_name[length:length + 1] != '.': raise DataKernelException( f'Constant[{variable_name}] is not supported.') return create_from_previous_trigger_data(prefix, variable_name[length + 1:]) else: return create_get_from_variables_with_prefix(prefix, variable_name)
def __init__( self, parameter: ParsedStorageConstantParameter, available_schemas: List[TopicSchema], principal_service: PrincipalService, allow_in_memory_variables: bool, try_to_type: Callable[[Any], Any]): # simply pass to super constructor super().__init__(parameter.parameter, available_schemas, principal_service, allow_in_memory_variables) self.parsedParameter = parameter if try_to_type is None: raise DataKernelException('Type cast function cannot be none.') self.tryToType = try_to_type
def parse(self, parameter: TopicFactorParameter, principal_service: PrincipalService) -> None: if is_blank(parameter.topicId): raise DataKernelException(f'Topic not declared.') topic_service = get_topic_service(principal_service) topic: Optional[Topic] = topic_service.find_by_id(parameter.topicId) if topic is None: raise DataKernelException( f'Topic[id={parameter.topicId}] not found.') self.topic = topic if is_blank(parameter.factorId): raise DataKernelException(f'Factor not declared.') factor: Optional[Factor] = ArrayHelper( topic.factors).find(lambda x: x.factorId == parameter.factorId) if factor is None: raise DataKernelException( f'Factor[id={parameter.factorId}] in topic[id={topic.topicId}, name={topic.name}] not found.' ) self.factor = factor self.askValue = create_ask_factor_value(topic, factor)
def build_id_version_criteria(self, data: Dict[str, Any]) -> EntityCriteria: data_entity_helper = self.get_data_entity_helper() criteria: EntityCriteria = self.build_id_criteria(data) if data_entity_helper.is_versioned(): by_version = data_entity_helper.build_version_criteria(data) if by_version is None: raise DataKernelException( f'Version not found from given data[{data}].') criteria.append(by_version) return criteria
def action(variables: PipelineVariables, principal_service: PrincipalService) -> Any: if not end_parsed: e_parsed, e_value, e_date = get_date_from_variables( variables, principal_service, end_variable_name) if not e_parsed: raise DataKernelException( f'Value[{e_value}] cannot be parsed to date or datetime.' ) else: e_date = end_date if not start_parsed: s_parsed, s_value, s_date = get_date_from_variables( variables, principal_service, start_variable_name) if not s_parsed: raise DataKernelException( f'Value[{s_value}] cannot be parsed to date or datetime.' ) else: s_date = start_date return compute_date_diff(function, e_date, s_date, variable_name)
def do_encrypt(self, value: Any) -> Any: if isinstance(value, date): if self.method == FactorEncryptMethod.MASK_MONTH_DAY: return value.replace(month=1, day=1) elif self.method == FactorEncryptMethod.MASK_MONTH: return value.replace(month=1) elif self.method == FactorEncryptMethod.MASK_DAY: return value.replace(day=1) else: raise DataKernelException( f'Encrypt method[{self.method}] is not supported.') if not isinstance(value, str): raise DataKernelException( f'Given value[{value}] is not a string, cannot be masked.') parsed, date_value, date_format = is_date_plus_format( value, self.formats) if not parsed: raise DataKernelException( f'Given value[{value}] cannot be parsed to date or datetime by formats[{self.formats}].' ) # call myself to encrypt date masked_date = self.do_encrypt(date_value) # format to string again, now only decimal included formatted_value = masked_date.strftime(date_format) # test len, return directly when length is same if len(formatted_value) == len(value): return formatted_value # gather decimal character from formatted, other character from original final_value = [] formatted_value_index = 0 for ch in value: if not ch.isdecimal(): final_value.append(ch) else: final_value.append(formatted_value[formatted_value_index]) formatted_value_index = formatted_value_index + 1 return ''.join(final_value)
def reduce(variables: PipelineVariables, principal_service: PrincipalService) -> Decimal: first_value = parameters[0].value(variables, principal_service) first_decimal_value = parse_to_decimal(first_value, fallback_first) if first_decimal_value is None: raise DataKernelException( f'{numeric_name} [value={first_value}, type={type(first_value)}] is not supported.' ) result_decimal_value = first_decimal_value rest_parameters = parameters[1:] for rest_parameter in rest_parameters: rest_value = rest_parameter.value(variables, principal_service) rest_decimal_value = parse_to_decimal(rest_value, fallback_rest) if rest_decimal_value is None: raise DataKernelException( f'{numeric_name} [value={rest_value}, type={type(rest_value)}] is not supported.' ) result_decimal_value = reduce_func(result_decimal_value, rest_decimal_value) return result_decimal_value
def exists(self, one: Any, another: Any) -> bool: if another is None: return False elif isinstance(another, list): return ArrayHelper(another).some(lambda x: self.equals(x, one)) elif isinstance(another, str): if is_blank(another): return False return ArrayHelper( another.split(',')).some(lambda x: self.equals(x, one)) else: raise DataKernelException( f'Comparison of [none|int|float|decimal|date|time|datetime] in [list|comma joined string] are supported, ' f'current are [one={one}, another={another}].')
def assert_parameter_count(func_name: str, parameters: Optional[List[Parameter]], min_count: int = 1, max_count: int = 9999, allow_undefined: bool = False) -> None: if parameters is None: raise DataKernelException( f'Parameter not found on computation[{func_name}].') count = len(parameters) if count < min_count: raise DataKernelException( f'At least {min_count} parameter(s) on computation[{func_name}], current is [{parameters}].' ) if count > max_count: raise DataKernelException( f'At most {max_count} parameter(s) on computation[{func_name}], current is [{parameters}].' ) if not allow_undefined: found = ArrayHelper(parameters).some(lambda x: x is None) if found: raise DataKernelException( f'None parameter is not allowed on computation[{func_name}], current is [{parameters}].' )