Exemplo n.º 1
0
def get_schemas(client, spreadsheet_id):
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        field_metadata[stream_name] = mdata

        if stream_name == 'spreadsheet_metadata':
            api = stream_metadata.get('api', 'sheets')
            params = stream_metadata.get('params', {})
            querystring = '&'.join(
                ['%s=%s' % (key, value) for (key, value) in params.items()])
            path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
                spreadsheet_id), querystring)

            # GET spreadsheet_metadata, which incl. sheets (basic metadata for each worksheet)
            spreadsheet_md_results = client.get(path=path, params=querystring, api=api, \
                endpoint=stream_name)

            sheets = spreadsheet_md_results.get('sheets')
            if sheets:
                # Loop thru each worksheet in spreadsheet
                for sheet in sheets:
                    # GET sheet_json_schema for each worksheet (from function above)
                    sheet_json_schema, columns = get_sheet_metadata(
                        sheet, spreadsheet_id, client)

                    # SKIP empty sheets (where sheet_json_schema and columns are None)
                    if sheet_json_schema and columns:
                        sheet_title = sheet.get('properties', {}).get('title')
                        schemas[sheet_title] = sheet_json_schema
                        sheet_mdata = metadata.new()
                        sheet_mdata = metadata.get_standard_metadata(
                            schema=sheet_json_schema,
                            key_properties=['__sdc_row'],
                            valid_replication_keys=None,
                            replication_method='FULL_TABLE')
                        field_metadata[sheet_title] = sheet_mdata

    return schemas, field_metadata
Exemplo n.º 2
0
def discover(client, custom_reports):
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        stream_instance = STREAMS[stream_id]
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=stream_instance.key_properties,
            valid_replication_keys=stream_instance.replication_key,
            replication_method=stream_instance.replication_method)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=stream_instance.key_properties,
                metadata=stream_metadata,
                replication_key=stream_instance.replication_key,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=stream_instance.replication_method,
            ))
    if custom_reports:
        for report in custom_reports:
            schema = build_schema(client, report)
            schema = Schema.from_dict(schema)
            key_properties = report.get('key_properties')
            replication_key = report.get('valid_replication_keys')
            stream_metadata = metadata.get_standard_metadata(
                schema=schema.to_dict(),
                key_properties=key_properties,
                valid_replication_keys=replication_key,
                replication_method=None)
            streams.append(
                CatalogEntry(
                    tap_stream_id=report['stream_id'],
                    stream=report['stream_id'],
                    schema=schema,
                    key_properties=report.get('key_properties'),
                    metadata=stream_metadata,
                    replication_key=report.get('valid_replication_keys'),
                    is_view=None,
                    database=None,
                    table=None,
                    row_count=None,
                    stream_alias=report,
                    replication_method=None,
                ))
    return Catalog(streams)
Exemplo n.º 3
0
    def generate_catalog(self):
        cls = self.__class__

        # get the reference schemas
        refs = load_schema_references()
        # resolve the schema reference and make final schema
        schema = singer.resolve_schema_references(load_schema(cls.TABLE), refs)
        mdata = metadata.new()

        # use 'get_standard_metadata' with primary key, replication key and replication method
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=self.KEY_PROPERTIES,
            valid_replication_keys=self.REPLICATION_KEYS
            if self.REPLICATION_KEYS else None,
            replication_method=self.REPLICATION_METHOD)

        mdata_map = metadata.to_map(mdata)

        # make 'automatic' inclusion for replication keys
        for replication_key in self.REPLICATION_KEYS:
            mdata_map[('properties',
                       replication_key)]['inclusion'] = 'automatic'

        return [{
            'tap_stream_id': cls.TABLE,
            'stream': cls.TABLE,
            'key_properties': cls.KEY_PROPERTIES,
            'schema': schema,
            'metadata': metadata.to_list(mdata_map)
        }]
def discover():
    '''
    Run discovery mode
    '''
    streams = []

    for stream_id, stream_object in STREAMS.items():
        raw_schema = load_schema(stream_id)
        schema = Schema.from_dict(raw_schema)

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema=raw_schema,
                schema_name=stream_id,
                key_properties=stream_object.key_properties,
                valid_replication_keys=[stream_object.replication_key],
                replication_method=stream_object.replication_method))

        # make sure that the replication key field is mandatory
        if stream_object.replication_key:
            metadata.write(mdata,
                           ('properties', stream_object.replication_key),
                           'inclusion', 'automatic')

        streams.append(
            CatalogEntry(stream=stream_id,
                         tap_stream_id=stream_id,
                         key_properties=stream_object.key_properties,
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return Catalog(streams)
Exemplo n.º 5
0
def discover_streams(config):
    streams = []

    reports = json.loads(config['reports'])

    username = config['username']
    password = config['password']

    for report in reports:
        LOGGER.info('Downloading XSD to determine table schema "%s".',
                    report['report_name'])

        xsd = download_xsd(report['report_url'], username, password)
        schema = generate_schema_for_report(xsd)

        stream_md = metadata.get_standard_metadata(
            schema,
            key_properties=report.get('key_properties'),
            replication_method='FULL_TABLE')
        streams.append({
            'stream': report['report_name'],
            'tap_stream_id': report['report_name'],
            'schema': schema,
            'metadata': stream_md
        })

    return streams
Exemplo n.º 6
0
def get_schemas():

    schemas = {}
    schemas_metadata = {}

    for stream_name, stream_object in STREAMS.items():

        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)

        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method)

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys',
                                  stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(
                meta, ('properties', stream_object.replication_key),
                'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[stream_name] = schema
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Exemplo n.º 7
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, schema in raw_schemas.items():
        # create and add catalog entry
        stream = STREAM_OBJECTS[stream_name]
        catalog_entry = {
            "stream":
            stream_name,
            "tap_stream_id":
            stream_name,
            "schema":
            schema,
            "metadata":
            metadata.get_standard_metadata(
                schema=schema,
                key_properties=stream.key_properties,
                valid_replication_keys=stream.replication_keys,
                replication_method=stream.replication_method,
            ),
            "key_properties":
            stream.key_properties,
        }
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({"streams": catalog_entries})
Exemplo n.º 8
0
def do_discover(client):
    raw_schemas = _load_schemas()
    catalog_entries = []
    major_ver = client.request_feed("gbfs_versions").get("version")[0]
    feed_names = client.feed_names

    for feed_name in feed_names:
        versioned_feed = f"{feed_name}_v{major_ver}"
        # create and add catalog entry
        stream = STREAM_OBJECTS.get(versioned_feed)
        if stream is None:
            continue

        schema = raw_schemas[versioned_feed]

        catalog_entry = {
            "stream":
            versioned_feed,
            "tap_stream_id":
            versioned_feed,
            "schema":
            schema,
            "metadata":
            metadata.get_standard_metadata(
                schema=schema,
                key_properties=stream.key_properties,
                valid_replication_keys=stream.replication_keys,
                replication_method=stream.replication_method,
            ),
            "key_properties":
            stream.key_properties,
        }
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({"streams": catalog_entries})
Exemplo n.º 9
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['uuid']

        replication_key = None
        if stream_id == 'qa':
            replication_key = 'sequence_id'

        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys=replication_key,
            replication_method=None)

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=replication_key,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Exemplo n.º 10
0
def get_schemas(config):
    schemas = {}
    schemas_metadata = {}
    client = S3Client(config['aws_access_key_id'], config['aws_secret_access_key'])

    for tap_stream_id, table_spec in config['tables'].items():
        LOGGER.info(f'Starting discovery for {tap_stream_id}')
        stream_object = Stream(client, table_spec, None)
        stream_schema = stream_object.get_schema()

        meta = metadata.get_standard_metadata(
            schema=stream_schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method
        )

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys', stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(meta, ('properties', stream_object.replication_key), 'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[tap_stream_id] = stream_schema
        schemas_metadata[tap_stream_id] = meta

    return schemas, schemas_metadata
Exemplo n.º 11
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():

        # TODO: populate any metadata and stream's key properties here..
        mock_mdata = metadata.get_standard_metadata(schema.to_dict())
        metadata.write(metadata.to_map(mock_mdata), (), "selected", True)
        mock_keyprops = ['id']

        stream_metadata = mock_mdata
        key_properties = mock_keyprops
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Exemplo n.º 12
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, schema in raw_schemas.items():
        stream = STREAM_OBJECTS[stream_name]
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream.key_properties,
            replication_method=stream.replication_method )
        mdata = metadata.to_map(mdata)

        if stream.replication_key:
            mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key])

        for field_name in schema['properties'].keys():
            if field_name in stream.key_properties or field_name == stream.replication_key:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

        catalog_entry = {
            'stream': stream_name,
            'tap_stream_id': stream_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata),
            'key_properties': stream.key_properties}
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({'streams': catalog_entries})
Exemplo n.º 13
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        #   https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md
        # Reference:
        #   https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))

        # Add additional metadata
        if stream_name in ('ad_analytics_by_campaign',
                           'ad_analytics_by_creative'):
            mdata_map = metadata.to_map(mdata)
            mdata_map[('properties', 'date_range')]['inclusion'] = 'automatic'
            mdata_map[('properties', 'pivot')]['inclusion'] = 'automatic'
            mdata_map[('properties', 'pivot_value')]['inclusion'] = 'automatic'
            mdata = metadata.to_list(mdata_map)

        field_metadata[stream_name] = mdata

    return schemas, field_metadata
Exemplo n.º 14
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        mdata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=["report_id", "row_id"],
            valid_replication_keys=["report_date"],
            replication_method="INCREMENTAL",
        )

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=["report_id", "row_id"],
                metadata=mdata,
                replication_key=["report_date"],
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            )
        )
    return Catalog(streams)
Exemplo n.º 15
0
    def do_discover(self):
        logger.info('Starting discover')

        catalog = Catalog([])

        for stream in self.streams:
            stream.tap = self

            schema = Schema.from_dict(stream.get_schema())
            key_properties = stream.key_properties

            meta = metadata.get_standard_metadata(
                schema=schema.to_dict(),
                key_properties=key_properties,
                valid_replication_keys=[stream.state_field]
                if stream.state_field else None,
                replication_method=stream.replication_method)

            # If the stream has a state_field, it needs to mark that property with automatic metadata
            if stream.state_field:
                meta = metadata.to_map(meta)
                meta[('properties',
                      stream.state_field)]['inclusion'] = 'automatic'
                meta = metadata.to_list(meta)

            catalog.streams.append(
                CatalogEntry(stream=stream.schema,
                             tap_stream_id=stream.schema,
                             key_properties=key_properties,
                             schema=schema,
                             metadata=meta))

        return catalog
Exemplo n.º 16
0
def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])

    for tap_stream_id in schemas.stream_ids:
        schema_dict = schemas.load_schema(tap_stream_id)
        schema = Schema.from_dict(schema_dict)

        mdata = metadata.get_standard_metadata(
            schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id])

        mdata = metadata.to_map(mdata)

        # NB: `lists` and `messages` are required for their substreams.
        # This is an approximation of the initial functionality using
        # metadata, which marked them as `selected=True` in the schema.
        if tap_stream_id in ['lists', 'messages']:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        for field_name in schema_dict['properties'].keys():
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')

        catalog.streams.append(
            CatalogEntry(stream=tap_stream_id,
                         tap_stream_id=tap_stream_id,
                         key_properties=schemas.PK_FIELDS[tap_stream_id],
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return catalog
Exemplo n.º 17
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, schema in raw_schemas.items():
        # create and add catalog entry
        stream = STREAM_OBJECTS[stream_name]
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream.key_properties,
            valid_replication_keys=stream.replication_keys,
            replication_method=stream.replication_method,
        )
        mdata = metadata.to_map(mdata)
        for field_name in stream.replication_keys:
            metadata.write(mdata, ('properties', field_name), 'inclusion',
                           'automatic')

        catalog_entry = {
            "stream": stream_name,
            "tap_stream_id": stream_name,
            "schema": schema,
            "metadata": metadata.to_list(mdata),
            "key_properties": stream.key_properties,
        }
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({"streams": catalog_entries})
Exemplo n.º 18
0
 def load_metadata(self):
     return get_standard_metadata(
         schema=self.schema,
         key_properties=self.key_properties,
         valid_replication_keys=[self.replication_key],
         replication_method=self.replication_method,
     )
Exemplo n.º 19
0
 def load_metadata(self):
     return metadata.get_standard_metadata(
         schema=self.load_schema(),
         schema_name=self.name,
         key_properties=self.key_properties,
         valid_replication_keys=[self.replication_key],
         replication_method=self.replication_method)
def get_schemas(client, properties_flag, denest_properties_flag):
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        # When the client detects disable_engage_endpoint, skip discovering the stream
        if stream_name == 'engage' and client.disable_engage_endpoint:
            LOGGER.warning(
                'Mixpanel returned a 402 indicating the Engage endpoint and stream is unavailable. Skipping.'
            )
            continue

        schema = get_schema(client, properties_flag, denest_properties_flag,
                            stream_name)

        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        field_metadata[stream_name] = mdata

    return schemas, field_metadata
Exemplo n.º 21
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        key_properties = STREAM_CONFIGS[stream_id]['key_properties']
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            # TODO: Verify this works / is necessary
            valid_replication_keys=['date'],
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key='date',
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Exemplo n.º 22
0
def do_discover():
    raw_schemas = _load_schemas()
    catalog_entries = []

    for stream_name, stream in STREAM_OBJECTS.items():
        # create and add catalog entry
        schema = raw_schemas[stream_name]

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema=schema,
                key_properties=stream.key_properties,
                valid_replication_keys=stream.replication_keys,
                replication_method=stream.replication_method,
            ))
        # Set the replication_key MetaData to automatic as well
        mdata = metadata.write(mdata,
                               ('properties', stream.replication_keys[0]),
                               'inclusion', 'automatic')
        catalog_entry = {
            "stream": stream_name,
            "tap_stream_id": stream_name,
            "schema": schema,
            "metadata": metadata.to_list(mdata),
            "key_properties": stream.key_properties
        }
        catalog_entries.append(catalog_entry)

    return Catalog.from_dict({"streams": catalog_entries})
Exemplo n.º 23
0
def discover_schemas():
    # Load Facebook's shared schemas
    refs = load_shared_schema_refs()

    result = {'streams': []}
    streams = initialize_streams_for_discovery()
    for stream in streams:
        LOGGER.info('Loading schema for %s', stream.name)
        schema = singer.resolve_schema_references(load_schema(stream), refs)

        mdata = metadata.to_map(
            metadata.get_standard_metadata(
                schema, key_properties=stream.key_properties))

        bookmark_key = BOOKMARK_KEYS.get(stream.name)
        if bookmark_key == UPDATED_TIME_KEY or bookmark_key == CREATED_TIME_KEY:
            mdata = metadata.write(mdata, ('properties', bookmark_key),
                                   'inclusion', 'automatic')

        result['streams'].append({
            'stream': stream.name,
            'tap_stream_id': stream.name,
            'schema': schema,
            'metadata': metadata.to_list(mdata)
        })
    return result
Exemplo n.º 24
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['id']
        valid_replication_keys = None
        if stream_id == 'issues':
            valid_replication_keys = ['updated_at']
        elif stream_id == 'messages':
            valid_replication_keys = ['created_at']
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys=valid_replication_keys,
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Exemplo n.º 25
0
def get_schemas(config, config_path):

    schemas = {}
    schemas_metadata = {}

    streams = get_streams(config, config_path)

    LOGGER.info('There are {:d} valid streams in MS Dynamics'.format(
        len(streams)))

    for stream_name, stream_object in streams.items():
        schema = stream_object.schema

        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method)

        meta = metadata.to_map(meta)

        if stream_object.valid_replication_keys:
            meta = metadata.write(meta, (), 'valid-replication-keys',
                                  stream_object.valid_replication_keys)
        if stream_object.replication_key:
            meta = metadata.write(
                meta, ('properties', stream_object.replication_key),
                'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[stream_name] = schema
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Exemplo n.º 26
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            selected=stream_metadata.get('selected', True),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        field_metadata[stream_name] = mdata

    return schemas, field_metadata
Exemplo n.º 27
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        key_properties = ['gradable_id']
        if stream_id == 'section_scores':
            key_properties.append('section_id')
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=key_properties,
            valid_replication_keys='date_graded',
            replication_method=None)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key='date_graded',
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Exemplo n.º 28
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_class in STREAMS.items():
        base_schema_path = 'schemas/{}.json'.format(stream_name)
        schema_file_path = stream_class.json_schema or base_schema_path
        schema_path = get_abs_path(schema_file_path)
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_class.key_properties or None,
            valid_replication_keys=stream_class.replication_keys or None,
            replication_method=stream_class.replication_method or None)

        mdata_map = metadata.to_map(mdata)
        # update inclusion of "replication keys" as "automatic"
        for replication_key in (stream_class.replication_keys or []):
            mdata_map[('properties',
                       replication_key)]['inclusion'] = 'automatic'

        field_metadata[stream_name] = metadata.to_list(mdata_map)

    return schemas, field_metadata
Exemplo n.º 29
0
def get_schemas():
    schemas = {}
    schemas_metadata = {}

    streams = flatten_streams(STREAMS, {})
    for stream_name, stream_object in streams.items():
        LOGGER.info('Getting schema for {}'.format(stream_name))
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)

        refs = schema.pop("definitions", {})
        if refs:
            singer.resolve_schema_references(schema, refs)

        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_object.key_properties,
            replication_method=stream_object.replication_method
        )

        meta = metadata.to_map(meta)

        if stream_object.replication_key:
            meta = metadata.write(
                meta, ('properties', stream_object.replication_key), 'inclusion', 'automatic')

        meta = metadata.to_list(meta)

        schemas[stream_name] = schema
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Exemplo n.º 30
0
 def test_empty_valid_replication_keys_are_written(self):
     mdata = get_standard_metadata(valid_replication_keys=[])
     self.assertEqual(mdata, [{
         'breadcrumb': (),
         'metadata': {
             'valid-replication-keys': []
         }
     }])