Пример #1
0
def get_number_of_rows(db_engine: Engine, t: Table, sampling: int = 0):
    sampling = int(sampling)
    if 100 > sampling > 0:
        query_total = select([func.count().label('num')]).select_from(
            t.tablesample(sampling, name='alias',
                          seed=text('{}'.format(SEED))))
    else:
        query_total = select([func.count().label('num')]).select_from(alias(t))
    res_t: ResultProxy = db_engine.execute(query_total)
    total_rows = res_t.first()['num']
    res_t.close()
    return total_rows
Пример #2
0
def check_uniqueness(db_engine: Engine,
                     table: Table,
                     comb,
                     total_rows: int = None,
                     sampling: int = 0):
    if comb.__len__() == 0:
        return False
    fields = [c for c in comb]
    if not total_rows:
        total_rows = get_number_of_rows(db_engine, table, sampling)

    sampling = int(sampling)
    if 100 > sampling > 0:
        sample_t = table.tablesample(sampling,
                                     name='alias',
                                     seed=text('{}'.format(SEED)))
        sample_fields = [sample_t.columns[fn.name] for fn in comb]
        query_unique = select([func.count().label('num')]).select_from(
            alias(select(sample_fields).distinct()))

        res_u: ResultProxy = db_engine.execute(query_unique)
    else:

        if db_supports_checksum(db_engine):
            checksum_method = get_checksum_function(db_engine)
            query_unique = select(
                [func.count(checksum_method(*fields).distinct()).label('num')])
        else:
            query_unique = select([func.count().label('num')]).select_from(
                alias(select(fields).distinct()))

        res_u: ResultProxy = db_engine.execute(query_unique)

    unique_len = res_u.first()['num']
    res_u.close()

    return total_rows == unique_len, total_rows, unique_len