def sync_to_db(cls, name): datasource = db.session.query(cls).filter_by(datasource_name=name).first() if not datasource: db.session.add(cls(datasource_name=name)) cols = cls.latest_metadata(name) if not cols: return for col in cols: col_obj = ( db.session .query(Column) .filter_by(datasource_name=name, column_name=col) .first() ) datatype = cols[col]['type'] if not col_obj: col_obj = Column(datasource_name=name, column_name=col) db.session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.generate_metrics() db.session.commit()
def sync_to_db(cls, name, cluster): session = get_session() datasource = session.query(cls).filter_by(datasource_name=name).first() if not datasource: datasource = cls(datasource_name=name) session.add(datasource) datasource.cluster = cluster cols = datasource.latest_metadata() if not cols: return for col in cols: col_obj = ( session .query(Column) .filter_by(datasource_name=name, column_name=col) .first() ) datatype = cols[col]['type'] if not col_obj: col_obj = Column(datasource_name=name, column_name=col) session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.datasource = datasource col_obj.generate_metrics()
def ge_suite_to_sqla_columns(suite: str) -> dict: expectations = suite["expectations"] table_name = suite["expectation_suite_name"].split(".")[0] column_names = get_column_names(expectations) sqla_columns = [] for column_name in column_names: column = Column(name=column_name) all_columns_type_expectations = filter( lambda x: x["expectation_type"] == "expect_column_values_to_be_of_type", expectations) column_type_expectations = filter( lambda x: x["kwargs"]["column"] == column_name, all_columns_type_expectations) ge_type = list(column_type_expectations)[0]["kwargs"]["type_"] kwargs = {} if ge_type == "str": all_columns_length_expectations = filter( lambda x: x["expectation_type"] == "expect_column_value_lengths_to_be_between", expectations) column_length_expectations = list( filter(lambda x: x["kwargs"]["column"] == column_name, all_columns_length_expectations)) if len(column_length_expectations) == 1: length = column_length_expectations[0]["kwargs"]["max_value"] else: length = 100 kwargs = {"length": length} column.type = ge_to_sqla_types(ge_type, **kwargs) if column_name == "id" or column_name.endswith("_id"): column.primary_key = True if column_name.endswith( "_id") and column_name.split("_")[0] != table_name: foreign_table = column_name.split("_")[0] column.foreign_keys = [ForeignKey(f"{foreign_table}.id")] sqla_columns.append(column) return sqla_columns
def sync_to_db(cls, name): datasource = db.session.query(cls).filter_by( datasource_name=name).first() if not datasource: db.session.add(cls(datasource_name=name)) cols = cls.latest_metadata(name) if not cols: return for col in cols: col_obj = (db.session.query(Column).filter_by( datasource_name=name, column_name=col).first()) datatype = cols[col]['type'] if not col_obj: col_obj = Column(datasource_name=name, column_name=col) db.session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.generate_metrics() db.session.commit()