Python Schema示例，singer.catalog.Schema Python示例

示例#1

0

显示文件

    def column_schema(cls, col_info):
        date_types = ["dateTime"]
        number_types = ["number", "autoNumber"]
        pk_types = ["autoNumber"]

        air_type = "string"

        if "config" in col_info and "type" in col_info["config"]:
            air_type = col_info["config"]["type"]

        inclusion = "available"
        if air_type in pk_types:
            inclusion = "automatic"

        schema = Schema(inclusion=inclusion)

        singer_type = 'string'
        if air_type in number_types:
            singer_type = 'number'

        schema.type = ['null', singer_type]

        if air_type in date_types:
            schema.format = 'date-time'
        if air_type in ["date"]:
            schema.format = 'date'

        return schema

示例#2

0

显示文件

def discover(client):
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        pk = get_pk(stream_name)
        metadata = field_metadata[stream_name]
        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=pk,
                         schema=schema,
                         metadata=metadata))

    for fn in [_records_streams, _partner_records_streams]:
        singer_streams = _convert_to_singer_streams(fn(client))
        for stream_name, data in singer_streams.items():
            schema = Schema.from_dict(data['schema'])
            metadata = data['metadata']
            catalog.streams.append(
                CatalogEntry(stream=stream_name,
                             tap_stream_id=stream_name,
                             key_properties=PRIMARY_KEYS[stream_name],
                             schema=schema,
                             metadata=metadata))

    return catalog

示例#3

0

显示文件

def discover():
    schemas = get_schemas()
    catalog = Catalog([])

    for schema_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)

        metadata = []
        metadata.append({
            'metadata': {
                'selected-by-default': False
            },
            'breadcrumb': []
        })
        for field_name in schema_dict['properties'].keys():
            if field_name is '_id':
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })

        catalog.streams.append(
            CatalogEntry(stream=schema_name,
                         tap_stream_id=schema_name,
                         schema=schema,
                         metadata=metadata,
                         key_properties=['_id']))

    return catalog

示例#4

0

显示文件

def discover():
    schemas = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        pk = PKS[stream_name]

        metadata = []
        for field_name in schema_dict['properties'].keys():
            if field_name in pk:
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=pk,
                         schema=schema,
                         metadata=metadata))

    return catalog

示例#5

0

显示文件

def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])
    for stream in streams.STREAMS:
        schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id),
                                  inclusion="available")

        mdata = metadata.new()

        for prop in schema.properties:
            if prop in streams.PK_FIELDS[stream.tap_stream_id]:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'available')

        catalog.streams.append(
            CatalogEntry(
                stream=stream.tap_stream_id,
                tap_stream_id=stream.tap_stream_id,
                key_properties=streams.PK_FIELDS[stream.tap_stream_id],
                schema=schema,
                metadata=metadata.to_list(mdata)))
    return catalog

示例#6

0

显示文件

def discover(select_all, client, spreadsheet_id):
    schemas, field_metadata = get_schemas(client, spreadsheet_id)
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict, selected=select_all)
        mdata = field_metadata[stream_name]
        key_properties = None
        for mdt in mdata:
            table_key_properties = mdt.get('metadata',
                                           {}).get('table-key-properties')
            if table_key_properties:
                key_properties = table_key_properties

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=STREAMS.get(stream_name, {}).get(
                             'key_properties', key_properties),
                         schema=schema,
                         metadata=mdata))

    if select_all:
        select_all_fields_in_streams(catalog)

    return catalog

示例#7

0

显示文件

def discover():
    catalog = Catalog([])
    for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS:
        # print("tap stream id=",tap_stream_id)
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id))
        metadata = []
        for field_name in schema.properties.keys():
            # print("field name=",field_name)
            if field_name in schemas.PK_FIELDS[tap_stream_id]:
                inclusion = "automatic"
            else:
                inclusion = "available"
            metadata.append({
                "metadata": {
                    "inclusion": inclusion
                },
                "breadcrumb": ["properties", field_name],
            })
        catalog.streams.append(
            CatalogEntry(
                stream=tap_stream_id,
                tap_stream_id=tap_stream_id,
                key_properties=schemas.PK_FIELDS[tap_stream_id],
                schema=schema,
                metadata=metadata,
            ))
    return catalog

示例#8

0

显示文件

    def do_discover(self):
        logger.info('Starting discover')

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            meta = metadata.get_standard_metadata(
                schema=schema.to_dict(),
                key_properties=key_properties,
                valid_replication_keys=[stream.state_field]
                if stream.state_field else None,
                replication_method=stream.replication_method)

            # If the stream has a state_field, it needs to mark that property with automatic metadata
            if stream.state_field:
                meta = metadata.to_map(meta)
                meta[('properties',
                      stream.state_field)]['inclusion'] = 'automatic'
                meta = metadata.to_list(meta)

            catalog.streams.append(
                CatalogEntry(stream=stream.schema,
                             tap_stream_id=stream.schema,
                             key_properties=key_properties,
                             schema=schema,
                             metadata=meta))

        return catalog

示例#9

0

显示文件

文件： __init__.py 项目： rumeau/tap-typeform

def discover():
    catalog = Catalog([])
    for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS:
        #print("tap stream id=",tap_stream_id)
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id))
        metadata = []
        for field_name in schema.properties.keys():
            #print("field name=",field_name)
            if field_name in schemas.PK_FIELDS[tap_stream_id]:
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })
        catalog.streams.append(CatalogEntry(
            stream=tap_stream_id,
            tap_stream_id=tap_stream_id,
            key_properties=schemas.PK_FIELDS[tap_stream_id],
            schema=schema,
            metadata=metadata
        ))
    return catalog

示例#10

0

显示文件

文件： discover.py 项目： Horze-International/tap-surveymonkey

def discover():
    '''
    Run discovery mode
    '''
    streams = []

    for stream_id, stream_object in STREAMS.items():
        raw_schema = load_schema(stream_id)
        schema = Schema.from_dict(raw_schema)

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema=raw_schema,
                schema_name=stream_id,
                key_properties=stream_object.key_properties,
                valid_replication_keys=[stream_object.replication_key],
                replication_method=stream_object.replication_method))

        # make sure that the replication key field is mandatory
        if stream_object.replication_key:
            metadata.write(mdata,
                           ('properties', stream_object.replication_key),
                           'inclusion', 'automatic')

        streams.append(
            CatalogEntry(stream=stream_id,
                         tap_stream_id=stream_id,
                         key_properties=stream_object.key_properties,
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return Catalog(streams)

示例#11

0

显示文件

    def do_discover(self):
        logger.info('Starting discover')

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            metadata = []
            for prop, json_schema in schema.properties.items():
                inclusion = 'available'
                if prop in key_properties:
                    inclusion = 'automatic'
                metadata.append({
                    'breadcrumb': ['properties', prop],
                    'metadata': {
                        'inclusion': inclusion
                    }
                })

            catalog.streams.append(
                CatalogEntry(stream=stream.schema,
                             tap_stream_id=stream.schema,
                             key_properties=key_properties,
                             schema=schema,
                             metadata=metadata))

        return catalog

示例#12

0

显示文件

def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])

    for tap_stream_id in schemas.stream_ids:
        schema_dict = schemas.load_schema(tap_stream_id)
        schema = Schema.from_dict(schema_dict)

        mdata = metadata.get_standard_metadata(
            schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id])

        mdata = metadata.to_map(mdata)

        # NB: `lists` and `messages` are required for their substreams.
        # This is an approximation of the initial functionality using
        # metadata, which marked them as `selected=True` in the schema.
        if tap_stream_id in ['lists', 'messages']:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        for field_name in schema_dict['properties'].keys():
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')

        catalog.streams.append(
            CatalogEntry(stream=tap_stream_id,
                         tap_stream_id=tap_stream_id,
                         key_properties=schemas.PK_FIELDS[tap_stream_id],
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return catalog

示例#13

0

显示文件

    def do_discover(self):
        logger.info("Starting discover")

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            metadata = []
            metadata.append({"breadcrumb": [], "metadata": {"selected": True}})
            for prop, json_schema in schema.properties.items():
                metadata.append(
                    {
                        "breadcrumb": ["properties", prop],
                        "metadata": {"inclusion": "automatic", "selected": True},
                    }
                )

            catalog.streams.append(
                CatalogEntry(
                    stream=stream.schema,
                    tap_stream_id=stream.schema,
                    key_properties=key_properties,
                    schema=schema,
                    metadata=metadata,
                )
            )

        return catalog

示例#14

0

显示文件

文件： generating_json_schemas.py 项目： uptilab2/tap-google-analytics

def generate_catalog(client, standard_fields, custom_fields, exclusions):
    schema, mdata = generate_catalog_entry(client, standard_fields,
                                           custom_fields, field_exclusions)
    # Do the thing to generate the thing
    catalog_entry = CatalogEntry(schema=Schema.from_dict(schema),
                                 key_properties=['_sdc_record_hash'],
                                 stream='report',
                                 tap_stream_id='report',
                                 metadata=metadata.to_list(mdata))
    return Catalog([catalog_entry])

示例#15

0

显示文件

    def discover_base(cls, base_id, base_name=None):
        cls.logger.info("discover base " + base_id)
        headers = cls.__get_auth_header()
        response = requests.get(url=cls.metadata_url + base_id,
                                headers=headers)
        response.raise_for_status()
        entries = []

        for table in response.json()["tables"]:
            schema_cols = {
                "id": Schema(inclusion="automatic", type=['null', "string"])
            }

            meta = {}

            table_name = table["name"]
            keys = []
            meta = metadata.write(meta, (), "inclusion", "available")
            meta = metadata.write(meta, 'database_name', 'base_id', base_id)

            for field in table["fields"]:
                col_schema = cls.column_schema(field)
                if col_schema.inclusion == "automatic":
                    keys.append(field["name"])
                schema_cols[field["name"]] = col_schema
                meta = metadata.write(meta, ('properties', field["name"]),
                                      'inclusion', 'available')
                meta = metadata.write(meta, ('properties', field["name"]),
                                      'airtable_type', field["config"]["type"]
                                      or None)

            schema = Schema(type='object', properties=schema_cols)

            entry = CatalogEntry(tap_stream_id=table["id"],
                                 database=base_name or base_id,
                                 table=table_name,
                                 stream=table_name,
                                 metadata=metadata.to_list(meta),
                                 key_properties=keys,
                                 schema=schema)
            entries.append(entry)

        return entries

示例#16

0

显示文件

def discover():
    c = Catalog([])
    for stream in streams_.ALL_STREAMS:
        schema = Schema.from_dict(load_schema(stream.tap_stream_id))

        c.streams.append(CatalogEntry(
            stream=stream.tap_stream_id,
            tap_stream_id=stream.tap_stream_id,
            schema=schema,)
        )
    return c

示例#17

0

显示文件

文件： __init__.py 项目： stvhanna/tap-yotpo

def discover(ctx):
    catalog = Catalog([])
    for stream in streams_.all_streams:
        schema = Schema.from_dict(load_schema(ctx, stream.tap_stream_id),
                                  inclusion="automatic")
        catalog.streams.append(CatalogEntry(
            stream=stream.tap_stream_id,
            tap_stream_id=stream.tap_stream_id,
            key_properties=stream.pk_fields,
            schema=schema,
        ))
    return catalog

示例#18

0

显示文件

文件： __init__.py 项目： sbwheeler/tap-campaignmonitor

def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])
    for tap_stream_id in schemas.stream_ids:
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id),
                                  inclusion="automatic")
        catalog.streams.append(CatalogEntry(
            stream=tap_stream_id,
            tap_stream_id=tap_stream_id,
            key_properties=schemas.PK_FIELDS[tap_stream_id],
            schema=schema,
        ))
    return catalog

示例#19

0

显示文件

文件： __init__.py 项目： codyss/tap-sendgrid

def discover(ctx):
    # check_credentials_are_authorized(ctx)
    catalog = Catalog([])
    for stream in streams.STREAMS:
        schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id),
                                  inclusion="available")
        catalog.streams.append(
            CatalogEntry(
                stream=stream.tap_stream_id,
                tap_stream_id=stream.tap_stream_id,
                key_properties=streams.PK_FIELDS[stream.tap_stream_id],
                schema=schema,
            ))
    return catalog

示例#20

0

显示文件

文件： __init__.py 项目： plenadatadave/tap-jira

def discover(config):
    test_credentials_are_authorized(config)
    catalog = Catalog([])
    for stream in streams_.all_streams:
        schema = Schema.from_dict(load_schema(stream.tap_stream_id),
                                  inclusion="automatic")
        catalog.streams.append(
            CatalogEntry(
                stream=stream.tap_stream_id,
                tap_stream_id=stream.tap_stream_id,
                key_properties=stream.pk_fields,
                schema=schema,
            ))
    return catalog

示例#21

0

显示文件

文件： __init__.py 项目： vbourgin/tap-jira

def discover():
    catalog = Catalog([])
    for stream in streams_.ALL_STREAMS:
        schema = Schema.from_dict(load_schema(stream.tap_stream_id))

        mdata = generate_metadata(stream, schema)

        catalog.streams.append(
            CatalogEntry(stream=stream.tap_stream_id,
                         tap_stream_id=stream.tap_stream_id,
                         key_properties=stream.pk_fields,
                         schema=schema,
                         metadata=mdata))
    return catalog

示例#22

0

显示文件

def discover(ctx):
    check_credentials_are_authorized()
    catalog = Catalog([])
    for tap_stream_id in schemas.stream_ids:
        schema = Schema.from_dict(load_schema(tap_stream_id),
                                  inclusion="available")
        catalog.streams.append(
            CatalogEntry(
                stream=tap_stream_id,
                tap_stream_id=tap_stream_id,
                key_properties=schemas.pk_fields[tap_stream_id],
                schema=schema,
            ))
    return catalog

示例#23

0

显示文件

def discover():
    catalog = Catalog([])

    for stream_name, endpoint_config in get_endpoints():
        schema_dict, metadata = get_schema(stream_name, endpoint_config)
        schema = Schema.from_dict(schema_dict)

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=endpoint_config['pk'],
                         schema=schema,
                         metadata=metadata))

    return catalog

示例#24

0

显示文件

def discover(ctx):
    ctx.refresh_credentials()
    catalog = Catalog([])
    for stream in streams_.all_streams:
        schema_dict = load_schema(stream.tap_stream_id)
        mdata = load_metadata(stream, schema_dict)

        schema = Schema.from_dict(schema_dict)
        catalog.streams.append(
            CatalogEntry(stream=stream.tap_stream_id,
                         tap_stream_id=stream.tap_stream_id,
                         key_properties=stream.pk_fields,
                         schema=schema,
                         metadata=mdata))
    return catalog

示例#25

0

显示文件

def discover():
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = field_metadata[stream_name]

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=STREAMS[stream_name]['key_properties'],
                         schema=schema,
                         metadata=mdata))

    return catalog

示例#26

0

显示文件

文件： __init__.py 项目： GrowingData/tap-xero

def discover(config):
    config = init_credentials(config)
    catalog = Catalog([])
    for stream in streams_.all_streams:
        schema_dict = load_schema(stream.tap_stream_id)
        mdata = load_metadata(stream, schema_dict)
        schema_dict["selected"] = True
        schema = Schema.from_dict(schema_dict)

        catalog.streams.append(
            CatalogEntry(stream=stream.tap_stream_id,
                         tap_stream_id=stream.tap_stream_id,
                         key_properties=stream.pk_fields,
                         schema=schema,
                         metadata=mdata))
    return catalog

示例#27

0

显示文件

def discover(client):
    catalog = Catalog([])

    for resource_name in RESOURCES.keys():
        schema_dict, metadata = get_schema(client, resource_name)
        schema = Schema.from_dict(schema_dict)

        stream_name = RESOURCES[resource_name]

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=['Id'],
                         schema=schema,
                         metadata=metadata))

    return catalog

示例#28

0

显示文件

文件： discover.py 项目： jarobe42/tap-dynamics-crm

def discover(service):
    catalog = Catalog([])

    for entity_name, entity in service.entities.items():
        optionset_map = get_optionset_metadata(service, entity_name)
        schema_dict, metadata, pks = get_schema(entity.__odata_schema__,
                                                optionset_map)
        schema = Schema.from_dict(schema_dict)

        catalog.streams.append(
            CatalogEntry(stream=entity_name,
                         tap_stream_id=entity_name,
                         key_properties=pks,
                         schema=schema,
                         metadata=metadata))

    return catalog

示例#29

0

显示文件

def discover(ctx):
    LOGGER.info("Running discover")
    use_event_log = has_access_to_event_log(ctx)
    catalog = Catalog([])
    for tap_stream_id in streams_.stream_ids:
        if not use_event_log and tap_stream_id == schemas.IDS.EVENT_LOG:
            continue
        raw_schema = schemas.load_schema(ctx, tap_stream_id)
        schema = Schema.from_dict(raw_schema, inclusion="automatic")
        catalog.streams.append(
            CatalogEntry(
                stream=tap_stream_id,
                tap_stream_id=tap_stream_id,
                key_properties=schemas.PK_FIELDS[tap_stream_id],
                schema=schema,
            ))
    return catalog

示例#30

0

显示文件

def discover():
    schemas, schemas_metadata = get_schemas()

    streams = []
    for schema_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        schema_meta = schemas_metadata[schema_name]

        streams.append(
            CatalogEntry(
                tap_stream_id=schema_name,
                stream=schema_name,
                schema=schema,
                key_properties=STREAMS[schema_name]['key_properties'],
                metadata=schema_meta,
            ))
    return Catalog(streams)