Пример #1
0
    def test_get_grouped_master_schema(self):
        with io.open(self.master_json_schema_fp, 'r', encoding='utf-8') as f:
            exp_master_schema = OrderedDict({
                literal_eval(k): (
                    v if not v['dtype_range']
                    else {_k: (
                        _v if not _k == 'dtype_range'
                        else (
                            range(_v['start'], _v['stop']) if v['py_dtype'] == 'int' and isinstance(_v, dict) and 'start' in _v and 'stop' in _v
                            else _v
                        )
                    ) for _k, _v in v.items()
                    }
                )
                for k, v in json.load(f).items()
            })
        exp_master_schema = OrderedDict({
            k: (
                v if not isinstance(v.get('column_validation'), dict) or 'start' not in v['column_validation']
                else {**v, **{'column_validation': v['dtype_range']}}
            )
            for k, v in exp_master_schema.items()
        })
        exp_grouped_master_schema = { 
            schema_type: { 
                item_key[1]: item 
                for item_key, item in schema_items 
            } 
            for schema_type, schema_items in groupby(exp_master_schema.items(), key=lambda it: it[0][0]) 
        }

        res_grouped_master_schema = get_grouped_master_schema()
        self.assertEqual(exp_grouped_master_schema, res_grouped_master_schema)
Пример #2
0
 def test_get_column_schema__valid_schema_type_but_invalid_schema_column__raises_non_oed_schema_column_error(self, schema_type):
     column = np.random.choice([
         col for stype, col_dict in get_grouped_master_schema().items()
         for col in col_dict
         if stype != schema_type
     ])
     with self.assertRaises(NonOedSchemaColumnError):
         get_column_schema(schema_type, column)
Пример #3
0
def get_column_schema(schema_type, header):
    """
    Gets the column schema (definition) for a given column in an OED acc.,
    loc., reins. info. or reins. scope file, using a grouped version of
    the master file schema.

    :param schema_type: OED schema type indicator (``master``, ``loc``,
                        ``acc``, ``reinsinfo``, or ``reinsscope``)
    :type schema_type: str

    :param header: The column header
    :type column: str

    :return: The column schema
    :rtype: dict
    """
    _schema_type = schema_type.lower()

    if _schema_type == 'master':
        raise OedError(
            'Column schemas are only available for specific file '
            'schema types - "acc", "loc", "reinsinfo" or "reinsscope"')

    schema = get_grouped_master_schema()

    _header = header.lower()

    try:
        col_schema = schema[_schema_type][_header]
    except KeyError:
        is_non_oed_schema = _schema_type not in [stype for stype in schema]
        is_non_oed_column = not any(_header in schema[stype]
                                    for stype in schema)

        if is_non_oed_schema and is_non_oed_column:
            raise get_file_error(
                'non oed schema and column',
                '"{}" is not a valid OED schema type and "{}" is not a valid '
                'column in any OED schema'.format(schema_type, header))
        elif is_non_oed_schema and not is_non_oed_column:
            raise get_file_error(
                'non oed schema',
                '"{}" is not a valid OED schema type'.format(schema_type))
        elif not is_non_oed_schema and is_non_oed_column:
            raise get_file_error(
                'non oed column',
                '"{}" is not a valid column in any OED schema'.format(header))
        elif not is_non_oed_schema and _header not in schema[_schema_type]:
            raise get_file_error(
                'non oed schema column',
                '"{}" is not a valid column in the OED "{}" schema'.format(
                    header, schema_type))
    else:
        return col_schema
Пример #4
0
    lists,
    text,
)

from oedtools.schema import (
    get_column_schema,
    get_grouped_master_schema,
    get_schema,
    SCHEMA_DIR,
)
from oedtools.values import (
    get_values_profile, )

MASTER_SCHEMA = get_schema()

GROUPED_SCHEMA = get_grouped_master_schema()

SCHEMA_TYPES = list(GROUPED_SCHEMA)
SCHEMA_TYPES_EX_MASTER = list(set(SCHEMA_TYPES).difference(['master']))

VALUE_GROUPS = sorted(get_values_profile())

ALL = sorted(MASTER_SCHEMA)
REQUIRED_TYPES = ['R', 'CR', 'O']
REQUIRED = sorted(k for k, v in MASTER_SCHEMA.items() if v['required'] == 'R')
OPTIONAL = sorted(set(ALL).difference(REQUIRED))
NONNULL = sorted(k for k, v in MASTER_SCHEMA.items() if not v['blank'])
REQUIRED_NONNULL = set(REQUIRED).intersection(NONNULL)
NUMERIC = sorted(k for k, v in MASTER_SCHEMA.items()
                 if v['py_dtype'] in ['int', 'float'])
INT = sorted(k for k, v in MASTER_SCHEMA.items() if v['py_dtype'] == 'int')