Python Catalog.Catalog примеры, singer.catalog.Catalog.Catalog Python примеры использования

Пример #1

0

Показать файл

    def run_discovery(cls, args):
        cls.__apply_config(args.config)
        if "base_id" in args.config:
            base_id = args.config['base_id']
            entries = cls.discover_base(base_id)
            return Catalog(entries).dump()

        bases = cls.__get_base_ids()
        entries = []

        for base in bases:
            entries.extend(cls.discover_base(base["id"], base["name"]))
        return Catalog(entries).dump()

Пример #2

0

Показать файл

def discover(detect=True):
    if detect:
        raw_schemas = []
        for schema_name in ldap_core.SCHEMA_NAMES:
            raw_schemas.append(ldap_core.detect_schema(schema_name))
    else:
        raw_schemas = catalog_spec.load_schemas()
    streams = []
    for schema_name, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        stream_metadata = []
        key_properties = []

        streams.append(
            CatalogEntry(
                tap_stream_id=schema_name,
                stream=schema_name,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            )
        )
    return Catalog(streams)

Пример #3

0

Показать файл

def discover():
    catalog = Catalog([])
    for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS:
        # print("tap stream id=",tap_stream_id)
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id))
        metadata = []
        for field_name in schema.properties.keys():
            # print("field name=",field_name)
            if field_name in schemas.PK_FIELDS[tap_stream_id]:
                inclusion = "automatic"
            else:
                inclusion = "available"
            metadata.append({
                "metadata": {
                    "inclusion": inclusion
                },
                "breadcrumb": ["properties", field_name],
            })
        catalog.streams.append(
            CatalogEntry(
                stream=tap_stream_id,
                tap_stream_id=tap_stream_id,
                key_properties=schemas.PK_FIELDS[tap_stream_id],
                schema=schema,
                metadata=metadata,
            ))
    return catalog

Пример #4

0

Показать файл

def discover(client):
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        pk = get_pk(stream_name)
        metadata = field_metadata[stream_name]
        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=pk,
                         schema=schema,
                         metadata=metadata))

    for fn in [_records_streams, _partner_records_streams]:
        singer_streams = _convert_to_singer_streams(fn(client))
        for stream_name, data in singer_streams.items():
            schema = Schema.from_dict(data['schema'])
            metadata = data['metadata']
            catalog.streams.append(
                CatalogEntry(stream=stream_name,
                             tap_stream_id=stream_name,
                             key_properties=PRIMARY_KEYS[stream_name],
                             schema=schema,
                             metadata=metadata))

    return catalog

Пример #5

0

Показать файл

Файл: __init__.py Проект: JulesHuisman/tap-geosource

def discover(config):
    streams = []
    schemas = generate_schemas(config)

    for stream_id, schema in schemas.items():
        stream_metadata = []
        key_properties = []

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            )
        )

    return Catalog(streams)

Пример #6

0

Показать файл

Файл: __init__.py Проект: Pathlight/tap-maestroqa

def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['gradable_id']
        if stream_id == 'section_scores':
            key_properties.append('section_id')
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys='date_graded',
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key='date_graded',
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)

Пример #7

0

Показать файл

def main():
    # define required config file keys
    required_config_keys = ['client_id', 'client_secret', 'start_date']
    # check if required keys are in the config file
    args = singer.parse_args(required_config_keys)

    # get the input
    config = args.config
    catalog = args.catalog or Catalog([])
    state = args.state
    # instatiate the client
    client = BillwerkClient(config)

    if args.properties and not args.catalog:
        raise Exception(
            "DEPRECATED: Use of the 'properties' parameter is not supported. Please use --catalog instead"
        )

    if args.discover:
        LOGGER.info("Starting discovery mode")
        catalog = do_discover()
        write_catalog(catalog)
    else:
        LOGGER.info('Starting sync mode')
        do_sync(client, config, state, catalog)

Пример #8

0

Показать файл

Файл: test_sync_roles.py Проект: tonylangley/tap-nikabot

 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/roles?limit=1000&page=0",
         json=json.loads(ROLES_RESPONSE))
     requests_mock.get(
         "https://api.nikabot.com/api/v1/roles?limit=1000&page=1",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="roles",
             stream="roles",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "roles", "schema": {}, "key_properties": ["id"]}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "roles", "record": {"id": "d893ebf32d49c35c1d754774", "team_id": "T034F9NPW", "name": "0.5"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "roles", "record": {"id": "cfabd9aa6f3e6381a716da58", "team_id": "T034F9NPW", "name": "0.1"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]

Пример #9

0

Показать файл

def discover():
    """
    Run discovery mode
    """
    schemas, schemas_metadata = get_schemas()
    streams = []
    for stream_id, schema in schemas.items():
        schema_meta = schemas_metadata[stream_id]

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=STREAMS[stream_id]['key_properties'],
                metadata=schema_meta,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))

    return Catalog(streams)

Пример #10

0

Показать файл

Файл: discover.py Проект: Horze-International/tap-surveymonkey

def discover():
    '''
    Run discovery mode
    '''
    streams = []

    for stream_id, stream_object in STREAMS.items():
        raw_schema = load_schema(stream_id)
        schema = Schema.from_dict(raw_schema)

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema=raw_schema,
                schema_name=stream_id,
                key_properties=stream_object.key_properties,
                valid_replication_keys=[stream_object.replication_key],
                replication_method=stream_object.replication_method))

        # make sure that the replication key field is mandatory
        if stream_object.replication_key:
            metadata.write(mdata,
                           ('properties', stream_object.replication_key),
                           'inclusion', 'automatic')

        streams.append(
            CatalogEntry(stream=stream_id,
                         tap_stream_id=stream_id,
                         key_properties=stream_object.key_properties,
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return Catalog(streams)

Пример #11

0

Показать файл

def discover():
    raw_schemas = load_schemas()
    streams = []

    for stream_id, schema in raw_schemas.items():
        stream_metadata = []
        key_properties = []

        replication_key = "date"
        replication_method = "INCREMENTAL"
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=replication_key,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=replication_method,
            )
        )
    return Catalog(streams)

Пример #12

0

Показать файл

Файл: discover.py Проект: Phanatik/Python-Demo

def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        stream_metadata = [
                {
                    "metadata": {
                        "selected": True,
                        "schema-name": stream_id
                        },
                    "breadcrumb": []
                    }
            ]
        key_properties = []
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key="currentpage",
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method="INCREMENTAL",
            )
        )
    return Catalog(streams)

Пример #13

0

Показать файл

def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])

    for tap_stream_id in schemas.stream_ids:
        schema_dict = schemas.load_schema(tap_stream_id)
        schema = Schema.from_dict(schema_dict)

        mdata = metadata.get_standard_metadata(
            schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id])

        mdata = metadata.to_map(mdata)

        # NB: `lists` and `messages` are required for their substreams.
        # This is an approximation of the initial functionality using
        # metadata, which marked them as `selected=True` in the schema.
        if tap_stream_id in ['lists', 'messages']:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        for field_name in schema_dict['properties'].keys():
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')

        catalog.streams.append(
            CatalogEntry(stream=tap_stream_id,
                         tap_stream_id=tap_stream_id,
                         key_properties=schemas.PK_FIELDS[tap_stream_id],
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return catalog

Пример #14

0

Показать файл

Файл: __init__.py Проект: sridharangopal/tap-test-data-generator

def discover(config):
    raw_schemas = load_schemas(config)
    streams = []
    for stream_id, schema in raw_schemas.items():
        """ Load metadata from metadata folder """
        path = get_abs_path(config['metadata_dir']) + '/' + stream_id + '.json'
        if os.path.isfile(path):
            with open(path) as file:
                stream_metadata = json.load(file)
                key_properties = []
        else:
            # no metadata file adding default empty metadata
            stream_metadata = [
                {"metadata": {"selected": False, "inclusion": "available"}, "breadcrumb": []}
            ]
            key_properties = []
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            )
        )
    return Catalog(streams)

Пример #15

0

Показать файл

def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['uuid']

        replication_key = None
        if stream_id == 'qa':
            replication_key = 'sequence_id'

        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys=replication_key,
            replication_method=None)

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=replication_key,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)

Пример #16

0

Показать файл

def main():
    required_config_keys = ['start_date']
    args = singer.parse_args(required_config_keys)
    validate_config_view_ids(args.config)
    if "refresh_token" in args.config:  # if refresh_token in config assume OAuth2 credentials
        args.config['auth_method'] = "oauth2"
        additional_config_keys = [
            'client_id', 'client_secret', 'refresh_token'
        ]
    else:  # otherwise, assume Service Account details should be present
        args.config['auth_method'] = "service_account"
        additional_config_keys = ['client_email', 'private_key']

    singer.utils.check_config(args.config, additional_config_keys)

    config = args.config
    client = Client(config)
    catalog = args.catalog or Catalog([])
    state = args.state

    if args.properties and not args.catalog:
        raise Exception(
            "DEPRECATED: Use of the 'properties' parameter is not supported. Please use --catalog instead"
        )

    if args.discover:
        do_discover(client, config)
    else:
        do_sync(client, config, catalog, state)

Пример #17

0

Показать файл

Файл: __init__.py Проект: Pathlight/tap-clarabridge

def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        key_properties = STREAM_CONFIGS[stream_id]['key_properties']
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            # TODO: Verify this works / is necessary
            valid_replication_keys=['date'],
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key='date',
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)

Пример #18

0

Показать файл

Файл: test_sync_groups.py Проект: tonylangley/tap-nikabot

 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get("https://api.nikabot.com/api/v1/groups?limit=1000&page=0", json=json.loads(GROUPS_RESPONSE))
     requests_mock.get("https://api.nikabot.com/api/v1/groups?limit=1000&page=1", json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(
         streams=[
             CatalogEntry(
                 tap_stream_id="groups",
                 stream="groups",
                 schema=Schema.from_dict({}),
                 key_properties=["id"],
                 metadata=[{"breadcrumb": [], "metadata": {"selected": True}}],
             )
         ]
     )
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call('{"type": "SCHEMA", "stream": "groups", "schema": {}, "key_properties": ["id"]}\n'),
         call(
             '{"type": "RECORD", "stream": "groups", "record": {"id": "f1b4b37cc2658672770b789f", "team_id": "T034F9NPW", "name": "TA Squad 5"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "groups", "record": {"id": "3176700ac4f2203b825fae6c", "team_id": "T034F9NPW", "name": "Platform Toolkit"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]

Пример #19

0

Показать файл

def do_discovery(conn_config):
    all_streams = []
    dbs_to_discover = []

    if conn_config.get('dbs_to_discover'):
        dbs_to_discover = conn_config['dbs_to_discover']
    else:
        with post_db.open_connection(conn_config) as conn:
            with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
                LOGGER.info(
                    "Fetching all db's, to specify a single db include dbs_to_discover in config.json"
                )

                cur.execute("""
                SELECT datname
                FROM pg_database
                WHERE datistemplate = false
                    AND CASE WHEN version() LIKE '%Redshift%' THEN true
                            ELSE has_database_privilege(datname,'CONNECT')
                        END = true """)
                dbs_to_discover = (row[0] for row in cur.fetchall())

    for db_row in dbs_to_discover:
        dbname = db_row
        LOGGER.info("Discovering db %s", dbname)
        conn_config['dbname'] = dbname
        with post_db.open_connection(conn_config) as conn:
            db_streams = discover_db(conn)
            all_streams = all_streams + db_streams

    cluster_catalog = Catalog(all_streams)
    dump_catalog(cluster_catalog)
    return cluster_catalog

Пример #20

0

Показать файл

    def do_discover(self):
        logger.info("Starting discover")

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            metadata = []
            metadata.append({"breadcrumb": [], "metadata": {"selected": True}})
            for prop, json_schema in schema.properties.items():
                metadata.append(
                    {
                        "breadcrumb": ["properties", prop],
                        "metadata": {"inclusion": "automatic", "selected": True},
                    }
                )

            catalog.streams.append(
                CatalogEntry(
                    stream=stream.schema,
                    tap_stream_id=stream.schema,
                    key_properties=key_properties,
                    schema=schema,
                    metadata=metadata,
                )
            )

        return catalog

Пример #21

0

Показать файл

Файл: __init__.py Проект: rumeau/tap-typeform

def discover():
    catalog = Catalog([])
    for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS:
        #print("tap stream id=",tap_stream_id)
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id))
        metadata = []
        for field_name in schema.properties.keys():
            #print("field name=",field_name)
            if field_name in schemas.PK_FIELDS[tap_stream_id]:
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })
        catalog.streams.append(CatalogEntry(
            stream=tap_stream_id,
            tap_stream_id=tap_stream_id,
            key_properties=schemas.PK_FIELDS[tap_stream_id],
            schema=schema,
            metadata=metadata
        ))
    return catalog

Пример #22

0

Показать файл

Файл: __init__.py Проект: miroapp/tap-airtable

    def run_discovery(cls, args):
        headers = {'Authorization': 'Bearer {}'.format(args.config['token'])}
        response = requests.get(url=args.config['metadata_url'] +
                                args.config['base_id'],
                                headers=headers)
        entries = []

        for table in response.json()["tables"]:

            columns = {}
            table_name = table["name"]
            base = {
                "selected": args.config['selected_by_default'],
                "name": table_name,
                "properties": columns
            }

            columns["id"] = {"type": ["null", "string"], 'key': True}

            for field in table["fields"]:
                if not field["name"] == "Id":
                    columns[field["name"]] = {"type": ["null", "string"]}

            entry = CatalogEntry(table=table_name,
                                 stream=table_name,
                                 metadata=base)
            entries.append(entry)

        return Catalog(entries).dump()

Пример #23

0

Показать файл

Файл: __init__.py Проект: cguimont/tap-forecastapp

def discover():
    raw_schemas = load_schemas()
    streams = []
    for (stream_id, schema) in raw_schemas.items():

        # TODO: populate any metadata and stream's key properties here..

        stream_metadata = [{
            "breadcrumb": [],
            "metadata": {
                "replication-method": "FULL_TABLE",
            }
        }]
        key_properties = []
        streams.append(
            CatalogEntry(tap_stream_id=stream_id,
                         stream=stream_id,
                         schema=schema,
                         key_properties=key_properties,
                         metadata=stream_metadata,
                         replication_key=None,
                         is_view=None,
                         database=None,
                         table=None,
                         row_count=None,
                         stream_alias=None))

    return Catalog(streams)

Пример #24

0

Показать файл

def discover(select_all, client, spreadsheet_id):
    schemas, field_metadata = get_schemas(client, spreadsheet_id)
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict, selected=select_all)
        mdata = field_metadata[stream_name]
        key_properties = None
        for mdt in mdata:
            table_key_properties = mdt.get('metadata',
                                           {}).get('table-key-properties')
            if table_key_properties:
                key_properties = table_key_properties

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=STREAMS.get(stream_name, {}).get(
                             'key_properties', key_properties),
                         schema=schema,
                         metadata=mdata))

    if select_all:
        select_all_fields_in_streams(catalog)

    return catalog

Пример #25

0

Показать файл

def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['id']
        valid_replication_keys = None
        if stream_id == 'issues':
            valid_replication_keys = ['updated_at']
        elif stream_id == 'messages':
            valid_replication_keys = ['created_at']
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys=valid_replication_keys,
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)

Пример #26

0

Показать файл

    def do_discover(self):
        logger.info('Starting discover')

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            meta = metadata.get_standard_metadata(
                schema=schema.to_dict(),
                key_properties=key_properties,
                valid_replication_keys=[stream.state_field]
                if stream.state_field else None,
                replication_method=stream.replication_method)

            # If the stream has a state_field, it needs to mark that property with automatic metadata
            if stream.state_field:
                meta = metadata.to_map(meta)
                meta[('properties',
                      stream.state_field)]['inclusion'] = 'automatic'
                meta = metadata.to_list(meta)

            catalog.streams.append(
                CatalogEntry(stream=stream.schema,
                             tap_stream_id=stream.schema,
                             key_properties=key_properties,
                             schema=schema,
                             metadata=meta))

        return catalog

Пример #27

0

Показать файл

 def test(self):
     catalog = Catalog(
         [CatalogEntry(tap_stream_id='a'),
          CatalogEntry(tap_stream_id='b'),
          CatalogEntry(tap_stream_id='c')])
     entry = catalog.get_stream('b')
     self.assertEquals('b', entry.tap_stream_id)

Пример #28

0

Показать файл

    def do_discover(self):
        logger.info('Starting discover')

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            metadata = []
            for prop, json_schema in schema.properties.items():
                inclusion = 'available'
                if prop in key_properties:
                    inclusion = 'automatic'
                metadata.append({
                    'breadcrumb': ['properties', prop],
                    'metadata': {
                        'inclusion': inclusion
                    }
                })

            catalog.streams.append(
                CatalogEntry(stream=stream.schema,
                             tap_stream_id=stream.schema,
                             key_properties=key_properties,
                             schema=schema,
                             metadata=metadata))

        return catalog

Пример #29

0

Показать файл

def discover():
    schemas = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        pk = PKS[stream_name]

        metadata = []
        for field_name in schema_dict['properties'].keys():
            if field_name in pk:
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=pk,
                         schema=schema,
                         metadata=metadata))

    return catalog

Пример #30

0

Показать файл

def do_discovery(conn_config):
    all_streams = []

    with post_db.open_connection(conn_config) as conn:
        with conn.cursor(cursor_factory=psycopg2.extras.DictCursor,
                         name='stitch_cursor') as cur:
            cur.itersize = post_db.cursor_iter_size
            sql = """SELECT datname
            FROM pg_database
            WHERE datistemplate = false
                AND CASE WHEN version() LIKE '%Redshift%' THEN true
                        ELSE has_database_privilege(datname,'CONNECT')
                    END = true """

            if conn_config.get('filter_dbs'):
                sql = post_db.filter_dbs_sql_clause(sql,
                                                    conn_config['filter_dbs'])

            LOGGER.info("Running DB discovery: %s", sql)

            cur.execute(sql)
            filter_dbs = (row[0] for row in cur.fetchall())

    for db_row in filter_dbs:
        dbname = db_row
        LOGGER.info("Discovering db %s", dbname)
        conn_config['dbname'] = dbname
        with post_db.open_connection(conn_config) as conn:
            db_streams = discover_db(conn)
            all_streams = all_streams + db_streams

    cluster_catalog = Catalog(all_streams)
    dump_catalog(cluster_catalog)
    return cluster_catalog

Python Catalog.Catalog примеры использования