示例#1
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    flat_streams = flatten_streams()
    for stream_name, stream_metadata in flat_streams.items():
        replication_ind = stream_metadata.get('replication_ind', True)
        if replication_ind:
            schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
            with open(schema_path) as file:
                schema = json.load(file)
            schemas[stream_name] = schema

            metadata.new()

            # Documentation:
            # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
            # Reference:
            # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
            mdata = metadata.get_standard_metadata(
                schema=schema,
                key_properties=stream_metadata.get('key_properties', None),
                valid_replication_keys=stream_metadata.get(
                    'replication_keys', None),
                replication_method=stream_metadata.get('replication_method',
                                                       None))
            field_metadata[stream_name] = mdata
    return schemas, field_metadata
示例#2
0
def get_schemas(client, spreadsheet_id):
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        field_metadata[stream_name] = mdata

        if stream_name == 'spreadsheet_metadata':
            api = stream_metadata.get('api', 'sheets')
            params = stream_metadata.get('params', {})
            querystring = '&'.join(
                ['%s=%s' % (key, value) for (key, value) in params.items()])
            path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
                spreadsheet_id), querystring)

            # GET spreadsheet_metadata, which incl. sheets (basic metadata for each worksheet)
            spreadsheet_md_results = client.get(path=path, params=querystring, api=api, \
                endpoint=stream_name)

            sheets = spreadsheet_md_results.get('sheets')
            if sheets:
                # Loop thru each worksheet in spreadsheet
                for sheet in sheets:
                    # GET sheet_json_schema for each worksheet (from function above)
                    sheet_json_schema, columns = get_sheet_metadata(
                        sheet, spreadsheet_id, client)

                    # SKIP empty sheets (where sheet_json_schema and columns are None)
                    if sheet_json_schema and columns:
                        sheet_title = sheet.get('properties', {}).get('title')
                        schemas[sheet_title] = sheet_json_schema
                        sheet_mdata = metadata.new()
                        sheet_mdata = metadata.get_standard_metadata(
                            schema=sheet_json_schema,
                            key_properties=['__sdc_row'],
                            valid_replication_keys=None,
                            replication_method='FULL_TABLE')
                        field_metadata[sheet_title] = sheet_mdata

    return schemas, field_metadata
    def test_create_column_metadata(self):
        cols = [{'pos': 1, 'name': 'col1', 'type': 'int2', 'nullable': 'NO'},
                {'pos': 2, 'name': 'col2', 'type': 'float8',
                 'nullable': 'YES'},
                {'pos': 3, 'name': 'col3', 'type': 'timestamptz',
                 'nullable': 'NO'}]
        db_name = 'test-db'
        table_name = 'test_table'
        key_properties = ['col1']
        is_view = False
        expected_mdata = metadata.new()
        metadata.write(expected_mdata, (), 'selected-by-default', False)
        metadata.write(expected_mdata, (), 'valid-replication-keys', ['col3'])
        metadata.write(expected_mdata, (),
                       'table-key-properties', key_properties)
        metadata.write(expected_mdata, (), 'is-view', is_view)
        metadata.write(expected_mdata, (), 'schema-name', table_name)
        metadata.write(expected_mdata, (), 'database-name', db_name)
        for col in cols:
            schema = tap_redshift.schema_for_column(col)
            metadata.write(expected_mdata, (
                'properties', col['name']), 'selected-by-default', True)
            metadata.write(expected_mdata, (
                'properties', col['name']), 'sql-datatype', col['type'])
            metadata.write(expected_mdata, (
                'properties', col['name']), 'inclusion', schema.inclusion)

        actual_mdata = tap_redshift.create_column_metadata(
            db_name, cols, is_view, table_name, key_properties)
        assert_that(actual_mdata, equal_to(metadata.to_list(expected_mdata)))
示例#4
0
def discover(ctx):
    check_credentials_are_authorized(ctx)
    catalog = Catalog([])
    for stream in streams.STREAMS:
        schema = Schema.from_dict(streams.load_schema(stream.tap_stream_id),
                                  inclusion="available")

        mdata = metadata.new()

        for prop in schema.properties:
            if prop in streams.PK_FIELDS[stream.tap_stream_id]:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', prop),
                                       'inclusion', 'available')

        catalog.streams.append(
            CatalogEntry(
                stream=stream.tap_stream_id,
                tap_stream_id=stream.tap_stream_id,
                key_properties=streams.PK_FIELDS[stream.tap_stream_id],
                schema=schema,
                metadata=metadata.to_list(mdata)))
    return catalog
示例#5
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        # mdata = metadata.get_standard_metadata(
        mdata = get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None),
        )

        for field in stream_metadata['default_selected_fields']:
            write(mdata, ('properties', field), 'selected', 'true')

        field_metadata[stream_name] = to_list(mdata)

    return schemas, field_metadata
示例#6
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        #   https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md
        # Reference:
        #   https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))

        # Add additional metadata
        if stream_name in ('ad_analytics_by_campaign',
                           'ad_analytics_by_creative'):
            mdata_map = metadata.to_map(mdata)
            mdata_map[('properties', 'date_range')]['inclusion'] = 'automatic'
            mdata_map[('properties', 'pivot')]['inclusion'] = 'automatic'
            mdata_map[('properties', 'pivot_value')]['inclusion'] = 'automatic'
            mdata = metadata.to_list(mdata_map)

        field_metadata[stream_name] = mdata

    return schemas, field_metadata
示例#7
0
    def get_metadata(self):
        keys = self.schema.get('properties').keys()

        self.key_properties = [k for k in keys if 'date' in k]

        mdata = metadata.new()

        mdata = metadata.write(mdata, (), 'table-key-properties',
                               self.key_properties)

        mdata = metadata.write(mdata, (), 'forced-replication-method',
                               'INCREMENTAL')

        for field_name in keys:
            if field_name in self.key_properties:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected-by-default', True)

        return metadata.to_list(mdata)
示例#8
0
def load_discovered_schema(stream):
    schema = load_schema(stream.tap_stream_id)
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties',
                           stream.key_properties)
    mdata = metadata.write(mdata, (), 'forced-replication-method',
                           stream.replication_method)

    if stream.replication_key:
        mdata = metadata.write(mdata, (), 'valid-replication-keys',
                               [stream.replication_key])

    for field_name, props in schema['properties'].items():
        if field_name in stream.key_properties or field_name == stream.replication_key:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'available')

    # The engagements stream has nested data that we synthesize; The engagement field needs to be automatic
    if stream.tap_stream_id == "engagements":
        mdata = metadata.write(mdata, ('properties', 'engagement'),
                               'inclusion', 'automatic')

    return schema, metadata.to_list(mdata)
示例#9
0
def load_discovered_schema(stream):
    schema = load_schema(stream.tap_stream_id)
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), "table-key-properties",
                           stream.key_properties)
    mdata = metadata.write(mdata, (), "forced-replication-method",
                           stream.replication_method)

    if stream.replication_key:
        mdata = metadata.write(mdata, (), "valid-replication-keys",
                               [stream.replication_key])

    for field_name, props in schema["properties"].items():
        if field_name in stream.key_properties or field_name == stream.replication_key:
            mdata = metadata.write(mdata, ("properties", field_name),
                                   "inclusion", "automatic")
        else:
            mdata = metadata.write(mdata, ("properties", field_name),
                                   "inclusion", "available")

    # The engagements stream has nested data that we synthesize; The engagement field needs to be automatic
    if stream.tap_stream_id == "engagements":
        mdata = metadata.write(mdata, ("properties", "engagement"),
                               "inclusion", "automatic")

    return schema, metadata.to_list(mdata)
示例#10
0
def discover_catalog(name, automatic_inclusion, **kwargs):
    unsupported = kwargs.get("unsupported", frozenset([]))
    stream_automatic_inclusion = kwargs.get("stream_automatic_inclusion",
                                            False)
    root = os.path.dirname(os.path.realpath(__file__))
    path = os.path.join(root, 'schemas/{}.json'.format(name))
    mdata = metadata.new()

    with open(path, "r") as f:
        discovered_schema = json.load(f)

        for field in discovered_schema["schema"]["properties"]:
            if field in automatic_inclusion:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'automatic')
            elif field in unsupported:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'unsupported')
            else:
                mdata = metadata.write(mdata, ('properties', field),
                                       'inclusion', 'available')

        if stream_automatic_inclusion:
            mdata = metadata.write(mdata, (), 'inclusion', 'automatic')

        discovered_schema["metadata"] = metadata.to_list(mdata)
        return discovered_schema
def get_schemas(client, properties_flag, denest_properties_flag):
    schemas = {}
    field_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        # When the client detects disable_engage_endpoint, skip discovering the stream
        if stream_name == 'engage' and client.disable_engage_endpoint:
            LOGGER.warning(
                'Mixpanel returned a 402 indicating the Engage endpoint and stream is unavailable. Skipping.'
            )
            continue

        schema = get_schema(client, properties_flag, denest_properties_flag,
                            stream_name)

        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        field_metadata[stream_name] = mdata

    return schemas, field_metadata
示例#12
0
def discover():
    """
    Allow discovery of all streams and metadata
    """
    raw_schemas = load_schemas()
    streams = []

    for schema_name, schema in raw_schemas.items():
        mdata = metadata.new()
        mdata = metadata.write(mdata, (), 'table-key-properties', ['id'])
        mdata = metadata.write(mdata, ('properties', 'id'), 'inclusion',
                               'automatic')
        mdata = metadata.write(mdata, (), 'valid-replication-keys',
                               ['updated_at'])
        mdata = metadata.write(mdata, ('properties', 'updated_at'),
                               'inclusion', 'automatic')
        for field_name in schema['properties'].keys():
            if field_name not in {'id', 'updated_at'}:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

        # create and add catalog entry
        catalog_entry = {
            'stream': schema_name,
            'tap_stream_id': schema_name,
            'schema': schema,
            'metadata': metadata.to_list(mdata),
            'key_properties': ['id']
        }
        streams.append(catalog_entry)

    return {'streams': streams}
示例#13
0
    def generate_catalog(self):
        cls = self.__class__

        # get the reference schemas
        refs = load_schema_references()
        # resolve the schema reference and make final schema
        schema = singer.resolve_schema_references(load_schema(cls.TABLE), refs)
        mdata = metadata.new()

        # use 'get_standard_metadata' with primary key, replication key and replication method
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=self.KEY_PROPERTIES,
            valid_replication_keys=self.REPLICATION_KEYS
            if self.REPLICATION_KEYS else None,
            replication_method=self.REPLICATION_METHOD)

        mdata_map = metadata.to_map(mdata)

        # make 'automatic' inclusion for replication keys
        for replication_key in self.REPLICATION_KEYS:
            mdata_map[('properties',
                       replication_key)]['inclusion'] = 'automatic'

        return [{
            'tap_stream_id': cls.TABLE,
            'stream': cls.TABLE,
            'key_properties': cls.KEY_PROPERTIES,
            'schema': schema,
            'metadata': metadata.to_list(mdata_map)
        }]
示例#14
0
def _populate_metadata(schema_name: str, schema: Dict) -> Dict:
    """
    Populates initial metadata for each field in a schema.
    Args:
        schema_name: The schema name to generate metadata for e.g. 'general_ledger_accounts'.
        schema: The corresponding JSON schema.

    Returns: Metadata dictionary for the selected stream. Fields are disabled by default.

    """
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties',
                           KEY_PROPERTIES[schema_name])
    mdata = metadata.write(mdata, (), 'selected', False)

    for field_name in schema['properties']:
        if field_name in KEY_PROPERTIES[schema_name]:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'available')

            mdata = metadata.write(mdata, ('properties', field_name),
                                   'selected', False)

    return mdata
示例#15
0
    def load_metadata(self):
        schema = self.load_schema()
        mdata = metadata.new()

        mdata = metadata.write(mdata, (), 'table-key-properties',
                               self.key_properties)
        mdata = metadata.write(mdata, (), 'forced-replication-method',
                               self.replication_method)

        if self.replication_key:
            mdata = metadata.write(mdata, (), 'valid-replication-keys',
                                   [self.replication_key])

        for field_name in schema['properties'].keys():
            if field_name in self.key_properties or field_name == self.replication_key:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name),
                                       'inclusion', 'available')

        # For period stream adjust schema for time period
        if hasattr(self, 'period') and self.period == 'hourRange':
            mdata.pop(('properties', 'day'))
        elif hasattr(self, 'period') and self.period == 'dayRange':
            mdata.pop(('properties', 'hour'))

        return metadata.to_list(mdata)
示例#16
0
def get_metadata(schema, key_properties, replication_method, replication_key):
    mdata = metadata.new()
    mdata = metadata.write(mdata,
                           (),
                           'table-key-properties',
                           key_properties)
    mdata = metadata.write(mdata,
                           (),
                           'forced-replication-method',
                           replication_method)

    if replication_key:
        mdata = metadata.write(mdata,
                               (),
                               'valid-replication-keys',
                               [replication_key])

    for field_name in schema['properties'].keys():
        if field_name in key_properties \
                or field_name in [replication_key, "updated"]:
            mdata = metadata.write(mdata,
                                   ('properties', field_name),
                                   'inclusion',
                                   'automatic')
        else:
            mdata = metadata.write(mdata,
                                   ('properties', field_name),
                                   'inclusion',
                                   'available')

    return metadata.to_list(mdata)
示例#17
0
def get_schemas():
    schemas = {}
    field_metadata = {}

    for stream_name, stream_class in STREAMS.items():
        base_schema_path = 'schemas/{}.json'.format(stream_name)
        schema_file_path = stream_class.json_schema or base_schema_path
        schema_path = get_abs_path(schema_file_path)
        with open(schema_path) as file:
            schema = json.load(file)
        schemas[stream_name] = schema
        mdata = metadata.new()

        # Documentation:
        # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
        # Reference:
        # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_class.key_properties or None,
            valid_replication_keys=stream_class.replication_keys or None,
            replication_method=stream_class.replication_method or None)

        mdata_map = metadata.to_map(mdata)
        # update inclusion of "replication keys" as "automatic"
        for replication_key in (stream_class.replication_keys or []):
            mdata_map[('properties',
                       replication_key)]['inclusion'] = 'automatic'

        field_metadata[stream_name] = metadata.to_list(mdata_map)

    return schemas, field_metadata
示例#18
0
def generate_metadata(schema):
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', ['id'])
    for field_name, props in schema['properties'].items():
        mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')

    return metadata.to_list(mdata)
示例#19
0
def load_metadata(schema):
    mdata = metadata.new()

    for field_name in schema.get('properties', {}).keys():
        mdata = metadata.write(mdata, ('properties', field_name), 'inclusion',
                               'automatic')
        if field_name == "RECORDNO":
            mdata = metadata.write(mdata, (), 'table-key-properties',
                                   "RECORDNO")

    return metadata.to_list(mdata)
示例#20
0
def generate_metadata(schema_name, schema): 
    pk_fields = SCHEMA_PRIMARY_KEYS[schema_name]
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties', pk_fields)

    for field_name in schema['properties'].keys():
        if field_name in pk_fields:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata)
示例#21
0
def populate_metadata(schema_name, schema):
    mdata = metadata.new()
    #mdata = metadata.write(mdata, (), 'forced-replication-method', KEY_PROPERTIES[schema_name])
    mdata = metadata.write(mdata, (), 'table-key-properties', KEY_PROPERTIES[schema_name])

    for field_name in schema['properties'].keys():
        if field_name in KEY_PROPERTIES[schema_name]:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return mdata
示例#22
0
def load_metadata(table_spec, schema):
    mdata = metadata.new()

    mdata = metadata.write(mdata, (), 'table-key-properties', table_spec['key_properties'])

    for field_name in schema.get('properties', {}).keys():
        if table_spec.get('key_properties', []) and field_name in table_spec.get('key_properties', []):
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata)
示例#23
0
def generate_metadata(stream, schema):
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties', stream.pk_fields)

    for field_name in schema.properties.keys():
        if field_name in stream.pk_fields:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'available')

    return metadata.to_list(mdata)
示例#24
0
def populate_metadata(schema_name, schema):
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), "table-key-properties", ["id"])

    for field_name in schema["properties"].keys():
        mdata = metadata.write(
            mdata,
            ("properties", field_name),
            "inclusion",
            "automatic" if field_name == "id" else "available",
        )

    return mdata
示例#25
0
def populate_metadata(schema_name, schema):
    mdata = metadata.new()
    # mdata = metadata.write(mdata, (), 'forced-replication-method', KEY_PROPERTIES[schema_name])
    mdata = metadata.write(mdata, (), "table-key-properties", KEY_PROPERTIES[schema_name])
    for field_name in schema["properties"].keys():
        if field_name in KEY_PROPERTIES[schema_name]:
            mdata = metadata.write(
                mdata, ("properties", field_name), "inclusion", "automatic"
            )
        else:
            mdata = metadata.write(
                mdata, ("properties", field_name), "inclusion", "available"
            )
    return mdata
示例#26
0
def load_metadata(schema):
    mdata = metadata.new()

    key_properties = [
        sampling.SDC_SOURCE_FILE_COLUMN, sampling.SDC_SOURCE_LINENO_COLUMN
    ]
    mdata = metadata.write(mdata, (), 'table-key-properties', key_properties)

    # Make all fields automatic
    for field_name in schema.get('properties', {}).keys():
        mdata = metadata.write(mdata, ('properties', field_name), 'inclusion',
                               'automatic')

    return metadata.to_list(mdata)
示例#27
0
def load_metadata(table_name, schema):
    mdata = metadata.new()

    key_properties = get_key_properties(table_name)
    mdata = metadata.write(mdata, (), 'table-key-properties', key_properties)

    for field_name in schema.get('properties', {}).keys():
        if field_name in key_properties:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'available')

    return metadata.to_list(mdata)
示例#28
0
def get_discovery_metadata(stream, schema):
    mdata = metadata.new()
    mdata = metadata.write(mdata, (), 'table-key-properties', stream.key_properties)
    mdata = metadata.write(mdata, (), 'forced-replication-method', stream.replication_method)

    if stream.replication_key:
        mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key])

    for field_name in schema['properties'].keys():
        if field_name in stream.key_properties or field_name == stream.replication_key:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
        else:
            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

    return metadata.to_list(mdata)
示例#29
0
    def generate_catalog(self):
        cls = self.__class__

        mdata = metadata.new()
        metadata.write(mdata, (), 'inclusion', 'available')
        for prop in cls.SCHEMA['properties']:  # pylint:disable=unsubscriptable-object
            metadata.write(mdata, ('properties', prop), 'inclusion',
                           'available')

        return [{
            'tap_stream_id': cls.TABLE,
            'stream': cls.TABLE,
            'key_properties': cls.KEY_PROPERTIES,
            'schema': cls.SCHEMA,
            'metadata': metadata.to_list(mdata)
        }]
    def load_metadata(self):
        schema = self.load_schema()
        mdata = metadata.new()

        mdata = metadata.write(mdata, (), 'table-key-properties', self.key_properties)
        mdata = metadata.write(mdata, (), 'forced-replication-method', self.replication_method)

        if self.replication_key:
            mdata = metadata.write(mdata, (), 'valid-replication-keys', [self.replication_key])

        for field_name in schema['properties'].keys():
            if field_name in self.key_properties or field_name == self.replication_key:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
            else:
                mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')

        return metadata.to_list(mdata)