def test_safe_column_name_is_null(self): """ Given a null word, we should get null back """ input_name = None self.assertIsNone(utils.safe_column_name(input_name))
def test_safe_column_name_case_1(self): """ Given an all lower case word would be wrapped in double quotes and capitalized """ input_name = 'group' self.assertEqual('"GROUP"', utils.safe_column_name(input_name))
def test_safe_column_name_case_4(self): """ Given a mixed-case word would be wrapped in backticks and capitalized """ input_name = 'CA se' self.assertEqual('`CA SE`', utils.safe_column_name(input_name, '`'))
def test_safe_column_name_case_3(self): """ Given a mixed-case word would be wrapped in double quotes and capitalized """ input_name = 'CA se' self.assertEqual('"CA SE"', utils.safe_column_name(input_name))
def test_safe_column_name_case_2(self): """ Given an all lower case word would be wrapped in backticks and capitalized """ input_name = 'group' self.assertEqual('`GROUP`', utils.safe_column_name(input_name, '`'))
def generate_transformations(cls, tap: Dict) -> List[Dict]: """ Generate the transformations data from the given tap config Args: tap: the tap config dictionary Returns: List of transformations """ transformations = [] for schema in tap.get('schemas', []): schema_name = schema.get('source_schema') for table in schema.get('tables', []): table_name = table.get('table_name') for trans in table.get('transformations', []): transformations.append({ 'tap_stream_name': utils.get_tap_stream_name(tap, tap['db_conn'].get('dbname'), schema_name, table_name), 'field_id': trans['column'], # Make column name safe by wrapping it in quotes, it's useful when a field_id is a reserved # word to be used by target snowflake in fastsync 'safe_field_id': safe_column_name(trans['column']), 'field_paths': trans.get('field_paths'), 'type': trans['type'], 'when': trans.get('when'), }) return transformations
def save_tap_jsons(self, target, tap, extra_config_keys=None): """ Generating JSON config files for a singer tap connector: 1. config.json :(Singer spec): Tap connection details 2. properties.json :(Singer spec): Tap schema properties (generated) 3. state.json :(Singer spec): Bookmark for incremental and log_based replications 4. selection.json :(Pipelinewise): List of streams/tables to replicate 5. inheritabel_config.json :(Pipelinewise): Extra config keys for the linked singer target connector that pipelinewise will pass at run time 6. transformation.json :(Pipelinewise): Column transformations between the tap and target """ if extra_config_keys is None: extra_config_keys = {} tap_dir = self.get_tap_dir(target.get('id'), tap.get('id')) self.logger.info('SAVING TAP JSONS to %s', tap_dir) # Define tap JSON file paths tap_config_path = os.path.join(tap_dir, 'config.json') tap_selection_path = os.path.join(tap_dir, 'selection.json') tap_transformation_path = os.path.join(tap_dir, 'transformation.json') tap_inheritable_config_path = os.path.join(tap_dir, 'inheritable_config.json') # Create tap dir if not exists if not os.path.exists(tap_dir): os.mkdir(tap_dir) # Generate tap config dict: a merged dictionary of db_connection and optional extra_keys tap_config = {**tap.get('db_conn'), **extra_config_keys} # Get additional properties will be needed later to generate tap_stream_id tap_dbname = tap_config.get('dbname') # Generate tap selection selection = [] for schema in tap.get('schemas', []): schema_name = schema.get('source_schema') for table in schema.get('tables', []): table_name = table.get('table_name') replication_method = table.get( 'replication_method', utils.get_tap_default_replication_method(tap)) selection.append( utils.delete_empty_keys({ 'tap_stream_id': utils.get_tap_stream_id(tap, tap_dbname, schema_name, table_name), 'replication_method': replication_method, # Add replication_key only if replication_method is INCREMENTAL 'replication_key': table.get('replication_key') if replication_method == 'INCREMENTAL' else None })) tap_selection = {'selection': selection} # Generate tap transformation transformations = [] for schema in tap.get('schemas', []): schema_name = schema.get('source_schema') for table in schema.get('tables', []): table_name = table.get('table_name') for trans in table.get('transformations', []): transformations.append({ 'tap_stream_name': utils.get_tap_stream_name(tap, tap_dbname, schema_name, table_name), 'field_id': trans['column'], # Make column name safe by wrapping it in quotes, it's useful when a field_id is a reserved word # to be used by target snowflake in fastsync 'safe_field_id': safe_column_name(trans['column']), 'type': trans['type'], 'when': trans.get('when') }) tap_transformation = {'transformations': transformations} # Generate stream to schema mapping schema_mapping = {} for schema in tap.get('schemas', []): source_schema = schema.get('source_schema') target_schema = schema.get('target_schema') target_schema_select_perms = schema.get( 'target_schema_select_permissions', []) schema_mapping[source_schema] = { 'target_schema': target_schema, 'target_schema_select_permissions': target_schema_select_perms } # Schema mapping can include list of indices to create. Some target components # like target-postgres create indices automatically indices = {} for table in schema.get('tables', []): table_name = table.get('table_name') table_indices = table.get('indices') if table_indices: indices[table_name] = table_indices # Add indices map to schema mapping if indices: schema_mapping[source_schema]['indices'] = indices # Generate tap inheritable_config dict tap_inheritable_config = utils.delete_empty_keys({ 'temp_dir': self.get_temp_dir(), 'batch_size_rows': tap.get('batch_size_rows', 20000), 'parallelism': tap.get('parallelism', 0), 'parallelism_max': tap.get('parallelism_max', 4), 'hard_delete': tap.get('hard_delete', True), 'flush_all_streams': tap.get('flush_all_streams', False), 'primary_key_required': tap.get('primary_key_required', True), 'default_target_schema': tap.get('default_target_schema'), 'default_target_schema_select_permissions': tap.get('default_target_schema_select_permissions'), 'schema_mapping': schema_mapping, # data_flattening_max_level # ------------------------- # # 'data_flattening_max_level' is an optional parameter in some target connectors that specifies # how to load nested object into destination. # # We can load the original object represented as JSON or string (data flattening off) or we can # flatten the schema and data by creating columns automatically. When 'data_flattening_max_level' # is set to 0 then flattening functionality is turned off. # # The value can be set in mutliple place and evaluated in the following order: # ------------ # 1: First we try to find it in the tap YAML # 2: Second we try to get the tap type specific default value # 3: Otherwise we set flattening level to 0 (disabled) 'data_flattening_max_level': tap.get( 'data_flattening_max_level', utils.get_tap_property( tap, 'default_data_flattening_max_level') or 0), 'validate_records': tap.get('validate_records', False), 'add_metadata_columns': tap.get('add_metadata_columns', False) }) # Save the generated JSON files utils.save_json(tap_config, tap_config_path) utils.save_json(tap_inheritable_config, tap_inheritable_config_path) utils.save_json(tap_transformation, tap_transformation_path) utils.save_json(tap_selection, tap_selection_path)