def import_topic_to_db(topic: Topic) -> Topic: result = storage_template.insert_one(topic, Topic, TOPICS) cacheman[TOPIC_BY_NAME].delete(topic.name) cacheman[TOPIC_DICT_BY_NAME].delete(topic.name) cacheman[TOPIC_BY_ID].delete(topic.topicId) cacheman[COLUMNS_BY_TABLE_NAME].delete(build_collection_name(topic.name)) return result
def find_and_modify_topic_data(topic_name, query, update_data): collection_name = build_collection_name(topic_name) codec_options = build_code_options() collection = db.get_collection(collection_name, codec_options=codec_options) old_value = collection.find_one_and_update(filter=query, update=update_data, upsert=True) trigger_pipeline(topic_name, {pipeline_constants.NEW: update_data, pipeline_constants.OLD: old_value}, TriggerType.update)
def topic_data_find_(self, where, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = select(table).where( self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) rows = cursor.fetchall() if rows is None: return None else: if isinstance(rows, list): results = [] for row in rows: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): if row[name] is not None: result[name] = json.loads(row[name]) else: result[name] = None else: result[name] = row[name] results.append(self._convert_dict_key(result, topic_name)) return results else: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): result[name] = dumps(rows[index]) else: result[name] = rows[index] return result
async def query_log_by_critical(query: MonitorLogQuery): query_dict = {} query_list = [] if query.criteria.topicId is not None: # query_dict["topicId"] = query.criteria.topicId query_list.append({"topicId": query.criteria.topicId}) if query.criteria.pipelineId is not None: # query_dict["pipelineId"] = query.criteria.pipelineId query_list.append({"pipelineId": query.criteria.pipelineId}) if query.criteria.startDate is not None and query.criteria.endDate is not None: query_list.append({"sys_insertTime": { "between": ( datetime.strptime(query.criteria.startDate, DATE_FORMAT), datetime.strptime(query.criteria.endDate, DATE_FORMAT) ) }}) if query.criteria.status is not None: # query_dict["status"] = query.criteria.status.upper() query_list.append({"status": query.criteria.status.upper()}) if len(query_list) > 1: query_dict['and'] = query_list else: query_dict = query_list[0] return query_pipeline_monitor(build_collection_name("raw_pipeline_monitor"), query_dict, query.pagination)
def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage: topic_collection_name = build_collection_name(name) codec_options = build_code_options() collection = self.client.get_collection(topic_collection_name, codec_options=codec_options) mongo_where = self.build_mongo_where_expression(where) total = collection.find(mongo_where).count() skips = pageable.pageSize * (pageable.pageNumber - 1) if sort is not None: cursor = collection.find(mongo_where).skip(skips).limit(pageable.pageSize).sort( self.build_mongo_order(sort)) else: cursor = collection.find(mongo_where).skip(skips).limit(pageable.pageSize) if model is not None: return build_data_pages(pageable, [model.parse_obj(result) for result in list(cursor)], total) else: results = [] if self.storage_template.check_topic_type(name) == RAW: for doc in cursor: results.append(doc['data_']) else: for doc in cursor: del doc['_id'] results.append(doc) return build_data_pages(pageable, results, total)
def topic_data_list_all(self, topic_name) -> list: table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = select(table) with self.engine.connect() as conn: cursor = conn.execute(stmt).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) rows = cursor.fetchall() if rows is None: return None else: results = [] for row in rows: result = {} for index, name in enumerate(columns): if isinstance(table.c[name.lower()].type, CLOB): if row[name] is not None: result[name] = json.loads(row[name]) else: result[name] = None else: result[name] = row[name] if self.storage_template.check_topic_type(name) == "raw": results.append(result['DATA_']) else: results.append(result) if self.storage_template.check_topic_type(name) == "raw": return results else: return self._convert_list_elements_key(results, topic_name)
def topic_data_find_with_aggregate(self, where, topic_name, aggregate): codec_options = build_code_options() topic_data_col = self.client.get_collection(build_collection_name(topic_name), codec_options=codec_options) for key, value in aggregate.items(): aggregate_ = {} if value == "sum": aggregate_ = {"$group": { "_id": "null", "value": {"$sum": f'${key}'} } } elif value == "count": return topic_data_col.count_documents(self.build_mongo_where_expression(where)) elif value == "avg": aggregate_ = {"$group": { "_id": "null", "value": {"$avg": f'${key}'} } } pipeline = [{"$match": self.build_mongo_where_expression(where)}, aggregate_] cursor = topic_data_col.aggregate(pipeline) for doc in cursor: result = doc["value"] return result
def update_topic(topic_id: str, topic: Topic) -> Topic: result = storage_template.update_one(topic, Topic, TOPICS) cacheman[TOPIC_BY_NAME].delete(topic.name) cacheman[TOPIC_DICT_BY_NAME].delete(topic.name) cacheman[TOPIC_BY_ID].delete(topic_id) cacheman[COLUMNS_BY_TABLE_NAME].delete(build_collection_name(topic.name)) return result
def topic_data_page_(self, where, sort, pageable, model, name) -> DataPage: table_name = build_collection_name(name) count = self.count_topic_data_table(table_name) table = self.get_topic_table_by_name(table_name) stmt = select(table).where( self.build_oracle_where_expression(table, where)) orders = self.build_oracle_order(table, sort) for order in orders: stmt = stmt.order_by(order) offset = pageable.pageSize * (pageable.pageNumber - 1) stmt = text( str(stmt.compile(compile_kwargs={"literal_binds": True})) + " OFFSET :offset ROWS FETCH NEXT :maxnumrows ROWS ONLY") result = [] with self.engine.connect() as conn: cursor = conn.execute(stmt, { "offset": offset, "maxnumrows": pageable.pageSize }).cursor columns = [col[0] for col in cursor.description] cursor.rowfactory = lambda *args: dict(zip(columns, args)) res = cursor.fetchall() if self.storage_template.check_topic_type(name) == "raw": for row in res: result.append(json.loads(row['DATA_'])) else: for row in res: if model is not None: result.append(parse_obj(model, row, table)) else: result.append(row) return build_data_pages(pageable, result, count)
def parse_parameter(parameter_: Parameter): if parameter_.kind == "topic": topic = get_topic_by_id(parameter_.topicId) topic_name = build_collection_name(topic.name) factor = get_factor(parameter_.factorId, topic) factor_name = factor.name return f'{factor_name.upper()}' elif parameter_.kind == 'constant': return parameter_.value elif parameter_.kind == 'computed': if parameter_.type == Operator.add: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}+{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.subtract: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}-{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.multiply: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}*{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.divide: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}/{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.modulus: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}%{next_}' else: result = parse_parameter(item) return result elif parameter_.type == "case-then": return parse_oracle_case_then(parameter_.parameters) else: raise Exception("operator is not supported")
def drop_topic_data_table(self, topic_name): try: table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) table.drop(self.engine) self.clear_metadata() except NoSuchTableError as err: log.info("NoSuchTableError: {0}".format(table_name))
def build_table_by_topic_id(topic_id) -> Table: topic = get_topic_by_id(topic_id) topic_col_name = build_collection_name(topic.name) datasource: DataSource = load_data_source_by_id(topic.dataSourceId) catalog_name = datasource.dataSourceCode schema_name = datasource.name schema = Schema(schema_name, LiteralValue(catalog_name)) return Table(topic_col_name, schema)
def topic_find_one_and_update(where: dict, updates: dict, name: str): codec_options = build_code_options() collection = client.get_collection(build_collection_name(name), codec_options=codec_options) return collection.find_one_and_update( filter=build_mongo_where_expression(where), update=updates, upsert=True)
def create_or_update__presto_schema_fields(topic: Topic): topic_name = build_collection_name(topic.name) presto_schema = collection.find_one({"table": topic_name}) new_schema = {"table": topic_name, "fields": __build_presto_fields(topic.factors)} if presto_schema is None: collection.insert(new_schema) else: collection.delete_one({"table": topic_name}) collection.insert(new_schema)
def insert_topic_data(topic_name, mapping_result, pipeline_uid): collection_name = build_collection_name(topic_name) codec_options = build_code_options() collection = db.get_collection(collection_name, codec_options=codec_options) add_audit_columns(mapping_result, INSERT) add_trace_columns(mapping_result, "insert_row", pipeline_uid) collection.insert(mapping_result) trigger_pipeline(topic_name, {pipeline_constants.NEW: mapping_result, pipeline_constants.OLD: None}, TriggerType.insert)
def topic_data_update_one_with_version(self, id_, version_, one, topic_name): codec_options = build_code_options() topic_data_col = self.client.get_collection(build_collection_name(topic_name), codec_options=codec_options) self.encode_dict(one) result = topic_data_col.update_one( self.build_mongo_where_expression({"_id": ObjectId(id_), "version_": version_}), self.build_mongo_updates_expression_for_update(one)) if result.modified_count == 0: raise OptimisticLockError("Optimistic lock error")
def find_and_modify_topic_data(topic_name, query, update_data, target_data): collection_name = build_collection_name(topic_name) collection = db.get_collection(collection_name) old_data = find_topic_data_by_id(collection, target_data["_id"]) collection.find_and_modify(query=query, update=update_data) trigger_pipeline(topic_name, { pipeline_constants.NEW: update_data, pipeline_constants.OLD: old_data }, TriggerType.update)
def update_topic_data(topic_name, mapping_result, target_data, pipeline_uid): collection_name = build_collection_name(topic_name) codec_options = build_code_options() collection = db.get_collection(collection_name, codec_options=codec_options) old_data = find_topic_data_by_id(collection, target_data["_id"]) add_audit_columns(mapping_result, UPDATE) add_trace_columns(mapping_result, "update_row", pipeline_uid) collection.update_one({"_id": target_data["_id"]}, {"$set": mapping_result}) data = {**target_data, **mapping_result} trigger_pipeline(topic_name, {pipeline_constants.NEW: data, pipeline_constants.OLD: old_data}, TriggerType.update)
def topic_data_update_one(self, id_: str, one: any, topic_name: str): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = update(table).where(eq(table.c['id_'], id_)) one_dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "update") stmt = stmt.values(value) with self.engine.begin() as conn: result = conn.execute(stmt) return result.rowcount
def topic_data_delete_(self, where, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) if where is None: stmt = delete(table) else: stmt = delete(table).where( self.build_oracle_where_expression(table, where)) with self.engine.connect() as conn: conn.execute(stmt)
def topic_data_insert_one(self, one, topic_name): codec_options = build_code_options() topic_data_col = self.client.get_collection(build_collection_name(topic_name), codec_options=codec_options) self.encode_dict(one) try: result = topic_data_col.insert_one(self.build_mongo_updates_expression_for_insert(one)) except WriteError as we: if we.code == 11000: # E11000 duplicate key error raise InsertConflictError("InsertConflict") return result.inserted_id
def parse_parameter(parameter: Parameter, factor=None): if parameter.kind == "topic": topic = get_topic_by_id(parameter.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(parameter.factorId, topic) return Table(topic_col_name)[factor.name] elif parameter.kind == 'constant': # if factor.type =="text": # return "\'"+parameter.value+"\'" # else: return parameter.value elif parameter.kind == 'computed': if parameter.type == Operator.add: result = None for item in parameter.parameters: if result: result = operator.add(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.subtract: result = None for item in parameter.parameters: if result: result = operator.sub(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.multiply: result = None for item in parameter.parameters: if result: result = operator.mul(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.divide: result = None for item in parameter.parameters: if result: result = operator.truediv(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.modulus: result = None for item in parameter.parameters: if result: result = operator.mod(result, parse_parameter(item)) else: result = parse_parameter(item) return result else: # TODO more operator support raise Exception("operator is not supported")
def topic_data_insert_(self, data, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) values = [] for instance in data: one_dict: dict = capital_to_lower(convert_to_dict(instance)) value = self.build_oracle_updates_expression( table, one_dict, "insert") values.append(value) stmt = insert(table) with self.engine.connect() as conn: result = conn.execute(stmt, values)
def topic_data_update_one_with_version(self, id_: str, version_: int, one: any, topic_name: str): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = update(table).where( and_(eq(table.c['id_'], id_), eq(table.c['version_'], version_))) one_dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "update") stmt = stmt.values(value) with self.engine.begin() as conn: result = conn.execute(stmt) if result.rowcount == 0: raise OptimisticLockError("Optimistic lock error")
def topic_data_insert_one(self, one, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) one_dict: dict = capital_to_lower(convert_to_dict(one)) value = self.build_oracle_updates_expression(table, one_dict, "insert") stmt = insert(table) with self.engine.connect() as conn: with conn.begin(): try: result = conn.execute(stmt, value) except IntegrityError as e: raise InsertConflictError("InsertConflict") return result.rowcount
def topic_data_update_(self, query_dict, instances: list, topic_name): table_name = build_collection_name(topic_name) table = self.get_topic_table_by_name(table_name) stmt = (update(table).where( self.build_oracle_where_expression(table, query_dict))) values = [] for instance in instances: one_dict = capital_to_lower(convert_to_dict(instance)) value = self.build_oracle_updates_expression( table, one_dict, "update") values.append(value) stmt = stmt.values(values) with self.engine.begin() as conn: result = conn.execute(stmt)
def create_or_update_presto_schema_fields(topic: Topic): if topic.type == RAW: log.info("raw topic ignore presto update") else: topic_name = build_collection_name(topic.name) presto_schema = collection.find_one({"table": topic_name}) new_schema = { "table": topic_name, "fields": __build_presto_fields(topic.factors) } if presto_schema is None: collection.insert(new_schema) else: collection.delete_one({"table": topic_name}) collection.insert(new_schema)
def _join(q: QueryBuilder, join: Join) -> QueryBuilder: # left topic = get_topic_by_id(join.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(join.factorId, topic) left_table = Table(topic_col_name).as_(topic.name) # right sec_topic = get_topic_by_id(join.secondaryTopicId) sec_topic_col_name = build_collection_name(sec_topic.name) sec_factor = get_factor(join.secondaryFactorId, sec_topic) right_table = Table(sec_topic_col_name).as_(sec_topic.name) if join.type == JoinType.inner: return q.join(right_table, JoinType.inner).on( operator.eq(left_table[factor.name], right_table[sec_factor.name])) if join.type == JoinType.left: return q.join(right_table, JoinType.left).on( operator.eq(left_table[factor.name], right_table[sec_factor.name])) if join.type == JoinType.right: return q.join(right_table, JoinType.right).on( operator.eq(left_table[factor.name], right_table[sec_factor.name]))
def create_or_update_presto_schema_fields_for_mongo(topic: Topic): if topic.type == RAW: log.info("raw topic ignore presto update") else: topic_name = build_collection_name(topic.name) presto_schema = find_one({"table": topic_name}, Schema, "_schema") new_schema = { "table": topic_name, "fields": __build_presto_fields(topic.factors) } if presto_schema is None: insert_one(new_schema, Schema, "_schema") else: delete_one({"table": topic_name}, "_schema") insert_one(new_schema, Schema, "_schema")
def parse_parameter(parameter: Parameter, factor=None): if parameter.kind == "topic": topic = get_topic_by_id(parameter.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(parameter.factorId, topic) result = { 'type': factor.type, 'value': Table(topic_col_name).as_(topic.name)[factor.name] } return result elif parameter.kind == 'constant': if parameter.value.strip().startswith("{&monthDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&monthDiff(", "").replace(")}", "") expr = _date_diff("month", args_str) result = {"type": "number", "value": expr} return result elif parameter.value.strip().startswith("{&dayDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&dayDiff(", "").replace(")}", "") expr = _date_diff("day", args_str) result = {"type": "number", "value": expr} return result elif parameter.value.strip().startswith("{&yearDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&yearDiff(", "").replace(")}", "") expr = _date_diff("year", args_str) result = {"type": "number", "value": expr} return result else: result = {'type': "text", 'value': parameter.value} return result elif parameter.kind == 'computed': result = None left = None for item in parameter.parameters: if left: right = parse_parameter(item) return _arithmetic_process(parameter.type, left, right) else: left = parse_parameter(item) return result