Exemplo n.º 1
0
def marshall_table_full(table_dict: Dict) -> Dict:
    """
    Forms the full version of a table Dict, with additional and sanitized fields
    :param table_dict: Table Dict from metadata service
    :return: Table Dict with sanitized fields
    """

    schema = TableSchema(strict=True)
    # TODO: consider migrating to validate() instead of roundtripping
    table: Table = schema.load(table_dict).data
    results: Dict[str, Any] = schema.dump(table).data

    # Check if schema is uneditable
    is_editable_schema = results['schema'] not in app.config[
        'UNEDITABLE_SCHEMAS']

    # Check if Table Description is uneditable
    is_editable_table = True
    uneditable_table_desc_match_rules = app.config[
        'UNEDITABLE_TABLE_DESCRIPTION_MATCH_RULES']
    for rule in uneditable_table_desc_match_rules:
        is_editable_table = is_editable_table and _parse_editable_rule(
            rule, results['schema'], results['name'])

    is_editable = is_editable_schema and is_editable_table
    results['is_editable'] = is_editable

    # TODO - Cleanup https://github.com/lyft/amundsen/issues/296
    #  This code will try to supplement some missing data since the data here is incomplete.
    #  Once the metadata service response provides complete user objects we can remove this.
    results['owners'] = [
        _map_user_object_to_schema(owner) for owner in results['owners']
    ]
    readers = results['table_readers']
    for reader_object in readers:
        reader_object['user'] = _map_user_object_to_schema(
            reader_object['user'])

    # If order is provided, we sort the column based on the pre-defined order
    if app.config['COLUMN_STAT_ORDER']:
        columns = results['columns']
        for col in columns:
            # the stat_type isn't defined in COLUMN_STAT_ORDER, we just use the max index for sorting
            col['stats'].sort(
                key=lambda x: app.config['COLUMN_STAT_ORDER'].get(
                    x['stat_type'], len(app.config['COLUMN_STAT_ORDER'])))
            col['is_editable'] = is_editable

    # TODO: Add the 'key' or 'id' to the base TableSchema
    results[
        'key'] = f'{table.database}://{table.cluster}.{table.schema}/{table.name}'
    # Temp code to make 'partition_key' and 'partition_value' part of the table
    results['partition'] = _get_partition_data(results['watermarks'])

    # We follow same style as column stat order for arranging the programmatic descriptions
    prog_descriptions = results['programmatic_descriptions']
    if prog_descriptions:
        _update_prog_descriptions(prog_descriptions)

    return results
Exemplo n.º 2
0
    def get(self, table_uri: str) -> Iterable[Union[Mapping, int, None]]:
        try:
            table = self.client.get_table(table_uri=table_uri)
            schema = TableSchema(strict=True)
            return schema.dump(table).data, HTTPStatus.OK

        except NotFoundException:
            return {'message': 'table_uri {} does not exist'.format(table_uri)}, HTTPStatus.NOT_FOUND
    def get(self, table_uri: str) -> Iterable[Union[Mapping, int, None]]:
        try:
            table, upstream, downstream = self.client.get_table(table_uri=table_uri)
            schema = TableSchema(strict=True)
            data = schema.dump(table).data
            data['upstream'] = upstream
            data['downstream'] = downstream
            return data, HTTPStatus.OK

        except NotFoundException:
            return {'message': 'table_uri {} does not exist'.format(table_uri)}, HTTPStatus.NOT_FOUND
        except Exception as e:
            LOGGER.error("Error", e)
            raise e