def test_upsert_qualitycheck(conn: Connector): from sqlalchemy import Column, DateTime, text, UniqueConstraint from sqlalchemy.dialects.postgresql import TEXT, INTEGER, BIGINT class A(DQBase): id = Column(BIGINT, primary_key=True) name = Column(TEXT, nullable=False) price = Column(INTEGER) created_at = Column( DateTime(timezone=True), server_default=text("NOW()"), nullable=False, index=True, ) __tablename__ = "my_table" __table_args__ = (UniqueConstraint("name", name=f"unique_constraint_test",),) conn.ensure_table(A.__table__) instance = A(name="hello", price=13) conn.upsert( objs=[instance,] ) # check if inserted s = conn.make_session() row = s.query(A.__table__).all() s.expunge_all() s.commit() assert len(row) == 1 assert row[0].price == 13 # change data and insert again - should upsert instance.price = 42 conn.upsert( objs=[instance,] ) row = s.query(A.__table__).all() s.expunge_all() s.commit() assert len(row) == 1 assert row[0].price == 42 s.close()
def set_medians(self, conn: Connector, days=30): """ Calculate median of passed/failed quality checks from last 30 days. """ now = datetime.today().date() past = now - timedelta(days=days) cls = self.__class__ session = conn.make_session() checks = (session.query(cls.failed, cls.passed).filter( and_(cls.task_ts <= str(now), cls.task_ts >= str(past))).all()) session.expunge_all() session.commit() session.close() failed = [ch.failed for ch in checks] self.median_30_day_failed = median(failed) if failed else None passed = [ch.passed for ch in checks] self.median_30_day_passed = median(passed) if passed else None
class ContessaRunner: """ todo - rewrite comments """ def __init__(self, conn_uri_or_engine, special_qc_map=None): self.conn_uri_or_engine = conn_uri_or_engine self.conn = Connector(conn_uri_or_engine) # todo - allow cfg self.special_qc_map = special_qc_map or {} def run( self, raw_rules: List[Dict[str, str]], check_table: Dict, result_table: Dict, # todo - docs for quality name, maybe defaults.. context: Optional[Dict] = None, ): check_table = Table(**check_table) result_table = ResultTable(**result_table) context = self.get_context(check_table, context) normalized_rules = self.normalize_rules(raw_rules) refresh_executors(check_table, self.conn, context) quality_check_class = self.get_quality_check_class(result_table) self.ensure_table(quality_check_class) rules = self.build_rules(normalized_rules) objs = self.do_quality_checks(quality_check_class, rules, context) self.insert(objs) @staticmethod def get_context(check_table: Table, context: Optional[Dict] = None) -> Dict: """ Construct context to pass to executors. User context overrides defaults. """ ctx_defaults = { "table_fullname": check_table.fullname, "task_ts": datetime.now(), # todo - is now() ok ? } ctx_defaults.update(context) return ctx_defaults def normalize_rules(self, raw_rules): return RuleNormalizer.normalize(raw_rules) def do_quality_checks(self, dq_cls, rules: List[Rule], context: Dict = None): """ Run quality check for all rules. Use `qc_cls` to construct objects that will be inserted afterwards. """ ret = [] for rule in rules: obj = self.apply_rule(context, dq_cls, rule) ret.append(obj) return ret def apply_rule(self, context, dq_cls, rule): e = get_executor(rule) logging.info(f"Executing rule `{rule}`.") results = e.execute(rule) obj = dq_cls() obj.init_row(rule, results, self.conn, context) return obj def insert(self, objs): """ Insert QualityCheck objects using sqlalchemy. If there is integrity error, skip it. """ logging.info(f"Inserting {len(objs)} results.") session = self.conn.make_session() try: session.add_all(objs) session.commit() except sqlalchemy.exc.IntegrityError: ts = objs[0].task_ts logging.info( f"This quality check ({ts}) was already done. Skipping it this time." ) session.rollback() finally: session.close() def ensure_table(self, qc_cls): """ Create table for QualityCheck class if it doesn't exists. E.g. quality_check_ """ try: qc_cls.__table__.create(bind=self.conn.engine) logging.info(f"Created table {qc_cls.__tablename__}.") except sqlalchemy.exc.ProgrammingError: logging.info( f"Table {qc_cls.__tablename__} already exists. Skipping creation." ) def build_rules(self, normalized_rules): """ Construct rules classes from user definition that are dicts. Raises if there are bad arguments for a certain rule. :return: list of Rule objects """ ret = [] for rule_def in normalized_rules: rule_cls = self.pick_rule_cls(rule_def) try: r = rule_cls(**rule_def) except Exception as e: logging.error(f"For rule `{rule_cls.__name__}`. {e.args[0]}") raise else: ret.append(r) return ret def pick_rule_cls(self, rule_def): """ Get rule class based on its name that was input by user. :param rule_def: dict :return: Rule class """ return get_rule_cls(rule_def["name"]) def get_quality_check_class(self, result_table: ResultTable): """ QualityCheck can be different, e.g. `special_table` has specific quality_check. Or kind of generic one that computes number of passed/failed objects etc. So determine if is special or not and return the class. :return: QualityCheck cls """ special_checks = self.special_qc_map.keys() if result_table.fullname in special_checks: quality_check_class = self.special_qc_map[result_table.fullname] logging.info( f"Using {quality_check_class.__name__} as quality check class." ) else: quality_check_class = create_default_quality_check_class( result_table) logging.info("Using default QualityCheck class.") return quality_check_class