def test_full_etl_postgres(): database = get_postgres_database() sql = "insert into test(id,amount) values(1,1)" get_reader(alias_postgres).execute(sql) etl_full(alias_postgres, database, {"test": "id"}, True) sql = f"select * from {database}.test" ret = get_writer().execute(sql) assert ret == [(1, Decimal("1"))]
def test_full_etl_mysql(): database = get_mysql_database() sql = f"insert into {database}.test(amount) values(1.00)" get_reader(alias_mysql).execute(sql) etl_full(alias_mysql, database, {"test": "id"}, True) sql = f"select * from {database}.test" ret = get_writer().execute(sql) assert ret == [(1, Decimal("1"))]
def etl_full( alias: str, schema: str, tables_pk: Dict, renew=False, ): """ full etl """ reader = get_reader(alias) source_db_database = Settings.get_source_db_database(alias, schema) schema = source_db_database.get("database") writer = get_writer() if not writer.check_database_exists(schema): if source_db_database.get("auto_create") is not False: writer.create_database(schema) else: logger.warning( f"Can't etl since no database {schema} found in ClickHouse and auto_create=false" ) exit(-1) for table in source_db_database.get("tables"): if table.get("auto_full_etl") is False: continue table_name = table.get("table") pk = tables_pk.get(table_name) writer = get_writer(table.get("clickhouse_engine")) if not pk: logger.warning(f"No pk found in {schema}.{table_name}, skip") continue elif isinstance(pk, tuple): pk = f"({','.join(pk)})" if renew: drop_sql = f"drop table if exists {schema}.{table_name}" writer.execute(drop_sql) logger.info(f"drop table success:{schema}.{table_name}") if not writer.check_table_exists(schema, table_name): sign_column = table.get("sign_column") version_column = table.get("version_column") writer.execute( writer.get_table_create_sql( reader, schema, table_name, pk, table.get("partition_by"), table.get("engine_settings"), sign_column=sign_column, version_column=version_column, )) if reader.fix_column_type and not table.get("skip_decimal"): writer.fix_table_column_type(reader, schema, table_name) full_insert_sql = writer.get_full_insert_sql( reader, schema, table_name, sign_column) writer.execute(full_insert_sql) logger.info(f"full data etl for {schema}.{table_name} success") else: logger.debug( f"{schema}.{table_name} exists, skip, or use --renew force etl with drop old tables" )
def produce(ctx: Context): alias = ctx.obj["alias"] reader = get_reader(alias) broker = get_broker(alias) logger.info( f"start producer success at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ) reader.start_sync(broker)
def create_mysql_table(initialize_tests): database = get_mysql_database() sql = f"""create database if not exists {database};use {database};create table if not exists `test` ( `id` int not null auto_increment, `amount` decimal(10,2) not null, primary key (`id`) ) engine=innodb auto_increment=10 default charset=utf8mb4 collate=utf8mb4_general_ci""" reader = get_reader(alias_mysql) reader.execute(sql)
def etl(ctx: Context, schema: str, renew: bool, table: List[str]): alias = ctx.obj["alias"] tables = table if not tables: tables = Settings.get_source_db_database_tables_name(alias, schema) tables_pk = {} reader = get_reader(alias) for table in tables: tables_pk[table] = reader.get_primary_key(schema, table) etl_full(alias, schema, tables_pk, renew)
def truncate_postgres_table(request): postgres = get_postgres_database() sql = f"truncate table {postgres}.public.test restart identity cascade" reader = get_reader(alias_postgres) reader.execute(sql) def finalizer(): reader.execute(sql) get_writer().execute(f"truncate table if exists {postgres}.test") request.addfinalizer(finalizer)
def truncate_mysql_table(request): database = get_mysql_database() sql = f"truncate table {database}.test" reader = get_reader(alias_mysql) reader.execute(sql) def finalizer(): reader.execute(sql) get_writer().execute(f"truncate table if exists {database}.test") request.addfinalizer(finalizer)
def check(ctx: Context, schema: str): alias = ctx.obj["alias"] reader = get_reader(alias) writer = get_writer() tables = Settings.get_source_db_database_tables_name(alias, schema) for table in tables: source_table_count = reader.get_count(schema, table) target_table_count = writer.get_count(schema, table) if source_table_count == target_table_count: logger.info(f"{schema}.{table} is equal, count={source_table_count}") else: logger.warning( f"{schema}.{table} is not equal, source_table_count={source_table_count}, target_table_count={target_table_count}" )
def consume(ctx: Context, schema: str, skip_error: bool, last_msg_id: str): alias = ctx.obj["alias"] reader = get_reader(alias) tables = Settings.get_source_db_database_tables_name(alias, schema) tables_pk = {} for table in tables: tables_pk[table] = reader.get_primary_key(schema, table) # try etl full etl_full(alias, schema, tables_pk) table_dict = Settings.get_source_db_database_tables_dict(alias, schema) continuous_etl( alias, schema, tables_pk, table_dict, last_msg_id, skip_error, )
def create_postgres_table(initialize_tests): database = get_postgres_database() reader = get_reader(alias_postgres) sql = f"create database {database}" try: reader.execute(sql) except psycopg2.errors.DuplicateDatabase: pass sql = """create table if not exists test ( id int not null primary key, amount decimal(10, 2) default null )""" try: reader.execute(sql) except psycopg2.ProgrammingError as e: assert str(e) == "no results to fetch" # nosec: B101
def produce(ctx: Context): alias = ctx.obj["alias"] reader = get_reader(alias) broker = get_broker(alias) logger.info(f"start producer for {alias} success") reader.start_sync(broker)
def test_execute_sql(): reader = get_reader(alias_postgres) sql = "select 1" ret = reader.execute(sql)[0] assert ret == [1]
def test_get_pk(): reader = get_reader(alias_mysql) ret = reader.get_primary_key(get_mysql_database(), "test") assert ret == "id"
def test_execute_sql(): reader = get_reader(alias_mysql) sql = "select 1" ret = reader.execute(sql)[0] assert ret == {"1": 1}
def test_get_binlog(): reader = get_reader(alias_mysql) ret = reader.get_binlog_pos() assert isinstance(ret, tuple)