Exemplo n.º 1
0
def generate_catalog(client, standard_fields, custom_fields, all_cubes,
                     cubes_lookup, profile_id):
    schema, mdata = generate_catalog_entry(client, standard_fields,
                                           custom_fields, all_cubes,
                                           cubes_lookup, profile_id)
    # Do the thing to generate the thing
    catalog_entry = CatalogEntry(schema=Schema.from_dict(schema),
                                 key_properties=['_sdc_record_hash'],
                                 stream='report',
                                 tap_stream_id='report',
                                 metadata=metadata.to_list(mdata))
    return Catalog([catalog_entry])
Exemplo n.º 2
0
def generate_catalog(client, report_config, standard_fields, custom_fields,
                     all_cubes, cubes_lookup, profile_ids):
    """
    Generate a catalog entry for each report specified in `report_config`
    """
    catalog_entries = []
    # for report in PREMADE_REPORTS:
    for report in report_config:
        # change to safe name for bigquery
        temp = report['name'].replace(' ', '_').lower()
        report['name'] = temp

        metrics_dimensions = set(report['metrics'] + report['dimensions'])
        selected_by_default = {
            *report['metrics'][:10],  # Use first 10 metrics in definition
            *report.get('default_dimensions', [])
        }
        premade_fields = [
            field for field in standard_fields
            if field['id'] in metrics_dimensions
        ]
        schema, mdata = generate_premade_catalog_entry(premade_fields,
                                                       all_cubes, cubes_lookup)

        mdata = reduce(
            lambda mdata, field_name: metadata.write(mdata, (
                "properties", field_name), "selected-by-default", True),
            selected_by_default, mdata)

        catalog_entries.append(
            CatalogEntry(schema=Schema.from_dict(schema),
                         key_properties=['_sdc_record_hash'],
                         stream=report['name'],
                         tap_stream_id=report['name'],
                         metadata=metadata.to_list(mdata)))

    # for report in report_config:
    for report in []:
        schema, mdata = generate_catalog_entry(client, standard_fields,
                                               custom_fields, all_cubes,
                                               cubes_lookup, profile_ids)

        catalog_entries.append(
            CatalogEntry(schema=Schema.from_dict(schema),
                         key_properties=['_sdc_record_hash'],
                         stream=report['name'],
                         tap_stream_id=report['id'],
                         metadata=metadata.to_list(mdata)))
    return Catalog(catalog_entries)
Exemplo n.º 3
0
def generate_streams(conn, table_info):
    entries = []
    for schema_name in table_info.keys():
        for table in table_info[schema_name].keys():

            with conn.cursor() as cur:
                sql = f"""
SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS kcu
    INNER JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS tc ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME 
                                                                 AND tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
WHERE kcu.TABLE_SCHEMA = '{schema_name}' AND kcu.TABLE_NAME = '{table}'"""
                cur.execute(sql)
                table_pks = [
                    col['COLUMN_NAME'] for col in convert_result_to_dict(cur)
                ]

                sql = """SELECT db_name()"""
                cur.execute(sql)
                database = cur.fetchone()[0]

            meta = {}
            columns = table_info[schema_name][table]['columns']

            metadata.write(meta, (), 'table-key-properties', table_pks)
            metadata.write(meta, (), 'schema-name', schema_name)
            metadata.write(meta, (), 'database-name', database)
            metadata.write(meta, (), 'row-count',
                           table_info[schema_name][table]['row_count'])
            metadata.write(meta, (), 'is-view',
                           table_info[schema_name][table]['is_view'])

            column_schemas = {
                col_name: schema_for_column(col_info, table_pks)
                for col_name, col_info in columns.items()
            }

            schema = Schema(type=object, properties=column_schemas)

            entry = CatalogEntry(table=table,
                                 stream=table,
                                 metadata=metadata.to_list(meta),
                                 tap_stream_id=get_tap_stream_id(
                                     database, schema_name, table),
                                 schema=schema)
            entries.append(entry)

    return Catalog(entries)
Exemplo n.º 4
0
def generate_catalog(
    client,
    report_config,
    standard_fields,
    custom_fields,
    all_cubes,
    cubes_lookup,
    profile_ids,
):
    """
    Generate a catalog entry for each report specified in `report_config`
    """
    catalog_entries = []

    for report in report_config:
        selected_by_default = {
            *report['metrics'][:10], *report.get('dimensions', [])
        }
        schema, mdata = generate_catalog_entry(client, standard_fields,
                                               custom_fields, all_cubes,
                                               cubes_lookup, profile_ids)

        mdata = reduce(
            lambda mdata, field_name: metadata.write(mdata, (
                "properties", field_name), "selected-by-default", True),
            selected_by_default, mdata)

        catalog_entries.append(
            CatalogEntry(
                schema=Schema.from_dict(schema),
                key_properties=['_sdc_record_hash'],
                stream=report['name'],
                tap_stream_id=report['name'],
                metadata=metadata.to_list(mdata),
            ))
    return Catalog(catalog_entries)
Exemplo n.º 5
0
def discover(conn, config):

    with connect_with_backoff(conn) as open_conn:
        with open_conn.cursor() as cur:
            cur.execute("""
            SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE From INFORMATION_SCHEMA.TABLES
            """)

            table_info = {}

            schemas = cur.fetchall()
            for (db, schema, table, table_type) in schemas:
                if db not in table_info:
                    table_info[db] = {}
                if schema not in table_info[db]:
                    table_info[db][schema] = {}

                table_info[db][schema][table] = {
                    # 'row_count': rows,
                    'is_view': table_type == 'VIEW'
                }

            cur.execute("""
            SELECT
       C.TABLE_SCHEMA, C.TABLE_NAME, C.COLUMN_NAME, C.DATA_TYPE, C.CHARACTER_MAXIMUM_LENGTH, C.NUMERIC_PRECISION,
       C.NUMERIC_PRECISION, TC.CONSTRAINT_TYPE
FROM INFORMATION_SCHEMA.COLUMNS C
    LEFT JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE CCU On C.COLUMN_NAME = CCU.COLUMN_NAME
    LEFT JOIN INFORMATION_SCHEMA.TABLE_CONSTRAINTS TC ON CCU.CONSTRAINT_NAME = Tc.CONSTRAINT_NAME
ORDER BY C.TABLE_SCHEMA, C.TABLE_NAME
            """)
            # res = cur.fetchall()

            columns = []
            rec = cur.fetchone()
            while rec is not None:
                columns.append(Column(*rec))
                rec = cur.fetchone()

            entries = []
            for (k, cols) in itertools.groupby(
                    columns, lambda c: (c.table_schema, c.table_name)):
                cols = list(cols)
                (table_schema, table_name) = k
                schema = Schema(type='object',
                                properties={
                                    c.column_name: schema_for_column(c)
                                    for c in cols
                                })
                md = create_column_metadata(cols)
                md_map = metadata.to_map(md)

                md_map = metadata.write(md_map, (), 'database-name',
                                        table_schema)

                is_view = table_info[db][table_schema][table_name]['is_view']

                if table_schema in table_info and table_name in table_info[
                        table_schema]:
                    row_count = table_info[table_schema][table_name].get(
                        'row_count')

                    if row_count is not None:
                        md_map = metadata.write(md_map, (), 'row-count',
                                                row_count)

                    md_map = metadata.write(md_map, (), 'is-view', is_view)

                column_is_key_prop = lambda c, s: (
                    c.constraint_type == 'PRI' and s.properties[
                        c.column_name].inclusion != 'unsupported')

                key_properties = [
                    c.column_name for c in cols
                    if column_is_key_prop(c, schema)
                ]

                if not is_view:
                    md_map = metadata.write(md_map, (), 'table-key-properties',
                                            key_properties)

                entry = CatalogEntry(
                    table=table_name,
                    stream=table_name,
                    metadata=metadata.to_list(md_map),
                    tap_stream_id=common.generate_tap_stream_id(
                        table_schema, table_name),
                    schema=schema)

                entries.append(entry)

        return Catalog(entries)

    raw_schemas = load_schemas()
    streams = []

    for schema_name, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        stream_metadata = []
        stream_key_properties = []

        # create and add catalog entry
        catalog_entry = {
            'stream': schema_name,
            'tap_stream_id': schema_name,
            'schema': schema,
            'metadata': [],
            'key_properties': []
        }
        streams.append(catalog_entry)

    return {'streams': streams}
Exemplo n.º 6
0
class TestValidateDependencies(unittest.TestCase):
    catalog = Catalog([
        CatalogEntry(tap_stream_id='boards',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='issue_board',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='project_board',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='epics',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': False
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='sprints',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
        CatalogEntry(tap_stream_id='issue_comments',
                     schema=Schema(),
                     metadata=[{
                         'metadata': {
                             'selected': True
                         },
                         'breadcrumb': []
                     }]),
    ])

    def test_is_selected(self):
        selected = utils.is_selected(streams.IssueBoard, self.catalog)
        self.assertTrue(selected)

    def test_raises_substream_error(self):
        test_streams = {'boards': streams.STREAMS['boards']}
        # test recursive checking
        test_streams['boards']['substreams']['issues'] = streams.STREAMS[
            'issues']
        self.assertRaises(utils.DependencyException,
                          utils.validate_dependencies, test_streams,
                          self.catalog)

    def test_raises_right_amount_of_substream_errors(self):
        test_streams = {'boards': streams.STREAMS['boards']}
        # test recursive checking
        test_streams['boards']['substreams']['issues'] = streams.STREAMS[
            'issues']
        with self.assertRaises(utils.DependencyException) as context:
            utils.validate_dependencies(test_streams, self.catalog)
            self.assertTrue(len(context.exception.errors) == 3)