def build_stream_processor( catalog: Dict, json_column_name: str, default_schema: str, name_transformer: DestinationNameTransformer, destination_type: DestinationType, tables_registry: TableNameRegistry, ) -> List[StreamProcessor]: result = [] for configured_stream in get_field(catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): stream_config = get_field(configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") # The logic here matches the logic in JdbcBufferedConsumerFactory.java. # Any modifications need to be reflected there and vice versa. schema = default_schema if "namespace" in stream_config: schema = stream_config["namespace"] schema_name = name_transformer.normalize_schema_name(schema, truncate=False) raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{schema}", truncate=False) stream_name = get_field(stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}") raw_table_name = name_transformer.normalize_table_name(f"_airbyte_raw_{stream_name}", truncate=False) source_sync_mode = get_source_sync_mode(configured_stream, stream_name) destination_sync_mode = get_destination_sync_mode(configured_stream, stream_name) cursor_field = [] primary_key = [] if source_sync_mode.value == SyncMode.incremental.value or destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value, ]: cursor_field = get_field(configured_stream, "cursor_field", f"Undefined cursor field for stream {stream_name}") if destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value ]: primary_key = get_field(configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}") message = f"'json_schema'.'properties' are not defined for stream {stream_name}" properties = get_field(get_field(stream_config, "json_schema", message), "properties", message) from_table = "source('{}', '{}')".format(schema_name, raw_table_name) stream_processor = StreamProcessor.create( stream_name=stream_name, destination_type=destination_type, raw_schema=raw_schema_name, schema=schema_name, source_sync_mode=source_sync_mode, destination_sync_mode=destination_sync_mode, cursor_field=cursor_field, primary_key=primary_key, json_column_name=f"'{json_column_name}'", properties=properties, tables_registry=tables_registry, from_table=from_table, ) result.append(stream_processor) return result
def build_stream_processor( catalog: Dict, json_column_name: str, target_schema: str, name_transformer: DestinationNameTransformer, destination_type: DestinationType, tables_registry: Set[str], ) -> List[StreamProcessor]: result = [] for configured_stream in get_field( catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): stream_config = get_field( configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") schema_name = name_transformer.normalize_schema_name(target_schema) raw_schema_name = name_transformer.normalize_schema_name( f"_airbyte_{target_schema}", truncate=False) stream_name = get_field( stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}" ) raw_table_name = name_transformer.normalize_table_name( f"_airbyte_raw_{stream_name}", truncate=False) message = f"'json_schema'.'properties' are not defined for stream {stream_name}" properties = get_field( get_field(stream_config, "json_schema", message), "properties", message) from_table = "source('{}', '{}')".format(schema_name, raw_table_name) # Check properties if not properties: raise EOFError( "Invalid Catalog: Unexpected empty properties in catalog") stream_processor = StreamProcessor.create( stream_name=stream_name, destination_type=destination_type, raw_schema=raw_schema_name, schema=schema_name, json_column_name=f"'{json_column_name}'", properties=properties, tables_registry=tables_registry, from_table=from_table, ) result.append(stream_processor) return result
def build_stream_processor( catalog: Dict, json_column_name: str, target_schema: str, name_transformer: DestinationNameTransformer, destination_type: DestinationType, tables_registry: Set[str], ) -> List[StreamProcessor]: result = [] for configured_stream in get_field( catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): stream_config = get_field( configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") schema_name = name_transformer.normalize_schema_name(target_schema) raw_schema_name = name_transformer.normalize_schema_name( f"_airbyte_{target_schema}", truncate=False) stream_name = get_field( stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}" ) raw_table_name = name_transformer.normalize_table_name( f"_airbyte_raw_{stream_name}", truncate=False) source_sync_mode = get_source_sync_mode(configured_stream, stream_name) destination_sync_mode = get_destination_sync_mode( configured_stream, stream_name) cursor_field = [] primary_key = [] if source_sync_mode.value == SyncMode.incremental.value or destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value, ]: cursor_field = get_field( configured_stream, "cursor_field", f"Undefined cursor field for stream {stream_name}") if destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value ]: primary_key = get_field( configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}") message = f"'json_schema'.'properties' are not defined for stream {stream_name}" properties = get_field( get_field(stream_config, "json_schema", message), "properties", message) from_table = "source('{}', '{}')".format(schema_name, raw_table_name) # Check properties if not properties: raise EOFError( "Invalid Catalog: Unexpected empty properties in catalog") stream_processor = StreamProcessor.create( stream_name=stream_name, destination_type=destination_type, raw_schema=raw_schema_name, schema=schema_name, source_sync_mode=source_sync_mode, destination_sync_mode=destination_sync_mode, cursor_field=cursor_field, primary_key=primary_key, json_column_name=f"'{json_column_name}'", properties=properties, tables_registry=tables_registry, from_table=from_table, ) result.append(stream_processor) return result
def build_stream_processor( catalog: Dict, json_column_name: str, default_schema: str, name_transformer: DestinationNameTransformer, destination_type: DestinationType, tables_registry: TableNameRegistry, ) -> List[StreamProcessor]: result = [] for configured_stream in get_field(catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): stream_config = get_field(configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") # The logic here matches the logic in JdbcBufferedConsumerFactory.java. # Any modifications need to be reflected there and vice versa. schema = default_schema if "namespace" in stream_config: schema = stream_config["namespace"] schema_name = name_transformer.normalize_schema_name(schema, truncate=False) if destination_type == DestinationType.ORACLE: quote_in_parenthesis = re.compile(r"quote\((.*)\)") raw_schema_name = name_transformer.normalize_schema_name(schema, truncate=False) if not quote_in_parenthesis.findall(json_column_name): json_column_name = name_transformer.normalize_column_name(json_column_name, in_jinja=True) else: column_inside_single_quote = re.compile(r"\'(.*)\'") raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{schema}", truncate=False) if not column_inside_single_quote.findall(json_column_name): json_column_name = f"'{json_column_name}'" stream_name = get_field(stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}") # MySQL table names need to be manually truncated, because it does not do it automatically truncate = destination_type == DestinationType.MYSQL raw_table_name = name_transformer.normalize_table_name(f"_airbyte_raw_{stream_name}", truncate=truncate) source_sync_mode = get_source_sync_mode(configured_stream, stream_name) destination_sync_mode = get_destination_sync_mode(configured_stream, stream_name) cursor_field = [] primary_key = [] if source_sync_mode.value == SyncMode.incremental.value or destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value, ]: cursor_field = get_field(configured_stream, "cursor_field", f"Undefined cursor field for stream {stream_name}") if destination_sync_mode.value in [ # DestinationSyncMode.upsert_dedup.value, DestinationSyncMode.append_dedup.value ]: primary_key = get_field(configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}") message = f"'json_schema'.'properties' are not defined for stream {stream_name}" properties = get_field(get_field(stream_config, "json_schema", message), "properties", message) from_table = dbt_macro.Source(schema_name, raw_table_name) stream_processor = StreamProcessor.create( stream_name=stream_name, destination_type=destination_type, raw_schema=raw_schema_name, default_schema=default_schema, schema=schema_name, source_sync_mode=source_sync_mode, destination_sync_mode=destination_sync_mode, cursor_field=cursor_field, primary_key=primary_key, json_column_name=json_column_name, properties=properties, tables_registry=tables_registry, from_table=from_table, ) result.append(stream_processor) return result