示例#1
0
def generate_catalog(client, report_config, standard_fields, custom_fields,
                     all_cubes, cubes_lookup, profile_ids):
    """
    Generate a catalog entry for each report specified in `report_config`
    """
    catalog_entries = []
    # for report in PREMADE_REPORTS:
    for report in report_config:
        # change to safe name for bigquery
        temp = report['name'].replace(' ', '_').lower()
        report['name'] = temp

        metrics_dimensions = set(report['metrics'] + report['dimensions'])
        selected_by_default = {
            *report['metrics'][:10],  # Use first 10 metrics in definition
            *report.get('default_dimensions', [])
        }
        premade_fields = [
            field for field in standard_fields
            if field['id'] in metrics_dimensions
        ]
        schema, mdata = generate_premade_catalog_entry(premade_fields,
                                                       all_cubes, cubes_lookup)

        mdata = reduce(
            lambda mdata, field_name: metadata.write(mdata, (
                "properties", field_name), "selected-by-default", True),
            selected_by_default, mdata)

        catalog_entries.append(
            CatalogEntry(schema=Schema.from_dict(schema),
                         key_properties=['_sdc_record_hash'],
                         stream=report['name'],
                         tap_stream_id=report['name'],
                         metadata=metadata.to_list(mdata)))

    # for report in report_config:
    for report in []:
        schema, mdata = generate_catalog_entry(client, standard_fields,
                                               custom_fields, all_cubes,
                                               cubes_lookup, profile_ids)

        catalog_entries.append(
            CatalogEntry(schema=Schema.from_dict(schema),
                         key_properties=['_sdc_record_hash'],
                         stream=report['name'],
                         tap_stream_id=report['id'],
                         metadata=metadata.to_list(mdata)))
    return Catalog(catalog_entries)
示例#2
0
def generate_catalog(client, standard_fields, custom_fields, all_cubes,
                     cubes_lookup, profile_id):
    schema, mdata = generate_catalog_entry(client, standard_fields,
                                           custom_fields, all_cubes,
                                           cubes_lookup, profile_id)
    # Do the thing to generate the thing
    catalog_entry = CatalogEntry(schema=Schema.from_dict(schema),
                                 key_properties=['_sdc_record_hash'],
                                 stream='report',
                                 tap_stream_id='report',
                                 metadata=metadata.to_list(mdata))
    return Catalog([catalog_entry])
    def test_binlog_stream_requires_historical_with_no_log_coordinates_returns_true(
            self):

        catalog = CatalogEntry(tap_stream_id='stream_1', schema={})

        state = {
            'bookmarks': {
                'stream_1': {},
                'stream_2': {},
            }
        }

        self.assertTrue(binlog_stream_requires_historical(catalog, state))
    def test_binlog_stream_requires_historical_with_gtid_returns_false(self):

        catalog = CatalogEntry(tap_stream_id='stream_1', schema={})

        state = {
            'bookmarks': {
                'stream_1': {
                    'gtid': '0-3834-222'
                },
                'stream_2': {},
            }
        }

        self.assertFalse(binlog_stream_requires_historical(catalog, state))
    def test_binlog_stream_requires_historical_with_log_coordinates_returns_false(
            self):

        catalog = CatalogEntry(tap_stream_id='stream_1', schema={})

        state = {
            'bookmarks': {
                'stream_1': {
                    'log_file': 'binlog.0001',
                    'log_pos': 1123
                },
                'stream_2': {},
            }
        }

        self.assertFalse(binlog_stream_requires_historical(catalog, state))
示例#6
0
def generate_streams(conn, table_info):
    entries = []
    for schema_name in table_info.keys():
        for table in table_info[schema_name].keys():

            with conn.cursor() as cur:
                sql = f"""
SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS kcu
    INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME 
                                                                 AND tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
WHERE kcu.TABLE_SCHEMA = '{schema_name}' AND kcu.TABLE_NAME = '{table}'"""
                cur.execute(sql)
                table_pks = [
                    col['COLUMN_NAME'] for col in convert_result_to_dict(cur)
                ]

                sql = """SELECT db_name()"""
                cur.execute(sql)
                database = cur.fetchone()[0]

            meta = {}
            columns = table_info[schema_name][table]['columns']

            metadata.write(meta, (), 'table-key-properties', table_pks)
            metadata.write(meta, (), 'schema-name', schema_name)
            metadata.write(meta, (), 'database-name', database)
            metadata.write(meta, (), 'row-count',
                           table_info[schema_name][table]['row_count'])
            metadata.write(meta, (), 'is-view',
                           table_info[schema_name][table]['is_view'])

            column_schemas = {
                col_name: schema_for_column(col_info, table_pks)
                for col_name, col_info in columns.items()
            }

            schema = Schema(type=object, properties=column_schemas)

            entry = CatalogEntry(table=table,
                                 stream=table,
                                 metadata=metadata.to_list(meta),
                                 tap_stream_id=get_tap_stream_id(
                                     database, schema_name, table),
                                 schema=schema)
            entries.append(entry)

    return Catalog(entries)
    def test_binlog_stream_requires_historical_with_log_coordinates_and_last_pk_value_returns_true(
            self):

        catalog = CatalogEntry(tap_stream_id='stream_1', schema={})

        state = {
            'bookmarks': {
                'stream_1': {
                    'log_file': 'binlog.0001',
                    'log_pos': 1123,
                    'last_pk_fetched': '111'
                },
                'stream_2': {},
            }
        }

        self.assertTrue(binlog_stream_requires_historical(catalog, state))
示例#8
0
 def get_catalog_entry(self, swagger: JsonResult) -> CatalogEntry:
     schema = self._map_to_schema(swagger)
     stream_metadata = metadata.get_standard_metadata(
         schema=schema.to_dict(),
         key_properties=self.key_properties,
         valid_replication_keys=[self.replication_key]
         if self.replication_key else None,
     )
     catalog_entry = CatalogEntry(
         tap_stream_id=self.stream_id,
         stream=self.stream_id,
         schema=schema,
         key_properties=self.key_properties,
         metadata=stream_metadata,
         replication_key=self.replication_key,
         replication_method=self.replication_method.name
         if self.replication_method else None,
     )
     return catalog_entry
示例#9
0
def generate_catalog(
    client,
    report_config,
    standard_fields,
    custom_fields,
    all_cubes,
    cubes_lookup,
    profile_ids,
):
    """
    Generate a catalog entry for each report specified in `report_config`
    """
    catalog_entries = []

    for report in report_config:
        selected_by_default = {
            *report['metrics'][:10], *report.get('dimensions', [])
        }
        schema, mdata = generate_catalog_entry(client, standard_fields,
                                               custom_fields, all_cubes,
                                               cubes_lookup, profile_ids)

        mdata = reduce(
            lambda mdata, field_name: metadata.write(mdata, (
                "properties", field_name), "selected-by-default", True),
            selected_by_default, mdata)

        catalog_entries.append(
            CatalogEntry(
                schema=Schema.from_dict(schema),
                key_properties=['_sdc_record_hash'],
                stream=report['name'],
                tap_stream_id=report['name'],
                metadata=metadata.to_list(mdata),
            ))
    return Catalog(catalog_entries)
示例#10
0
class TestValidateDependencies(unittest.TestCase):
    catalog = Catalog([
        CatalogEntry(tap_stream_id='boards',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='issue_board',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='project_board',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='epics',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='sprints',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='issue_comments',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
    ])

    def test_is_selected(self):
        selected = utils.is_selected(streams.IssueBoard, self.catalog)
        self.assertTrue(selected)

    def test_raises_substream_error(self):
        test_streams = {'boards': streams.STREAMS['boards']}
        # test recursive checking
        test_streams['boards']['substreams']['issues'] = streams.STREAMS[
            'issues']
        self.assertRaises(utils.DependencyException,
                          utils.validate_dependencies, test_streams,
                          self.catalog)

    def test_raises_right_amount_of_substream_errors(self):
        test_streams = {'boards': streams.STREAMS['boards']}
        # test recursive checking
        test_streams['boards']['substreams']['issues'] = streams.STREAMS[
            'issues']
        with self.assertRaises(utils.DependencyException) as context:
            utils.validate_dependencies(test_streams, self.catalog)
            self.assertTrue(len(context.exception.errors) == 3)