def test_get_grouped_master_schema(self): with io.open(self.master_json_schema_fp, 'r', encoding='utf-8') as f: exp_master_schema = OrderedDict({ literal_eval(k): ( v if not v['dtype_range'] else {_k: ( _v if not _k == 'dtype_range' else ( range(_v['start'], _v['stop']) if v['py_dtype'] == 'int' and isinstance(_v, dict) and 'start' in _v and 'stop' in _v else _v ) ) for _k, _v in v.items() } ) for k, v in json.load(f).items() }) exp_master_schema = OrderedDict({ k: ( v if not isinstance(v.get('column_validation'), dict) or 'start' not in v['column_validation'] else {**v, **{'column_validation': v['dtype_range']}} ) for k, v in exp_master_schema.items() }) exp_grouped_master_schema = { schema_type: { item_key[1]: item for item_key, item in schema_items } for schema_type, schema_items in groupby(exp_master_schema.items(), key=lambda it: it[0][0]) } res_grouped_master_schema = get_grouped_master_schema() self.assertEqual(exp_grouped_master_schema, res_grouped_master_schema)
def test_get_column_schema__valid_schema_type_but_invalid_schema_column__raises_non_oed_schema_column_error(self, schema_type): column = np.random.choice([ col for stype, col_dict in get_grouped_master_schema().items() for col in col_dict if stype != schema_type ]) with self.assertRaises(NonOedSchemaColumnError): get_column_schema(schema_type, column)
def get_column_schema(schema_type, header): """ Gets the column schema (definition) for a given column in an OED acc., loc., reins. info. or reins. scope file, using a grouped version of the master file schema. :param schema_type: OED schema type indicator (``master``, ``loc``, ``acc``, ``reinsinfo``, or ``reinsscope``) :type schema_type: str :param header: The column header :type column: str :return: The column schema :rtype: dict """ _schema_type = schema_type.lower() if _schema_type == 'master': raise OedError( 'Column schemas are only available for specific file ' 'schema types - "acc", "loc", "reinsinfo" or "reinsscope"') schema = get_grouped_master_schema() _header = header.lower() try: col_schema = schema[_schema_type][_header] except KeyError: is_non_oed_schema = _schema_type not in [stype for stype in schema] is_non_oed_column = not any(_header in schema[stype] for stype in schema) if is_non_oed_schema and is_non_oed_column: raise get_file_error( 'non oed schema and column', '"{}" is not a valid OED schema type and "{}" is not a valid ' 'column in any OED schema'.format(schema_type, header)) elif is_non_oed_schema and not is_non_oed_column: raise get_file_error( 'non oed schema', '"{}" is not a valid OED schema type'.format(schema_type)) elif not is_non_oed_schema and is_non_oed_column: raise get_file_error( 'non oed column', '"{}" is not a valid column in any OED schema'.format(header)) elif not is_non_oed_schema and _header not in schema[_schema_type]: raise get_file_error( 'non oed schema column', '"{}" is not a valid column in the OED "{}" schema'.format( header, schema_type)) else: return col_schema
lists, text, ) from oedtools.schema import ( get_column_schema, get_grouped_master_schema, get_schema, SCHEMA_DIR, ) from oedtools.values import ( get_values_profile, ) MASTER_SCHEMA = get_schema() GROUPED_SCHEMA = get_grouped_master_schema() SCHEMA_TYPES = list(GROUPED_SCHEMA) SCHEMA_TYPES_EX_MASTER = list(set(SCHEMA_TYPES).difference(['master'])) VALUE_GROUPS = sorted(get_values_profile()) ALL = sorted(MASTER_SCHEMA) REQUIRED_TYPES = ['R', 'CR', 'O'] REQUIRED = sorted(k for k, v in MASTER_SCHEMA.items() if v['required'] == 'R') OPTIONAL = sorted(set(ALL).difference(REQUIRED)) NONNULL = sorted(k for k, v in MASTER_SCHEMA.items() if not v['blank']) REQUIRED_NONNULL = set(REQUIRED).intersection(NONNULL) NUMERIC = sorted(k for k, v in MASTER_SCHEMA.items() if v['py_dtype'] in ['int', 'float']) INT = sorted(k for k, v in MASTER_SCHEMA.items() if v['py_dtype'] == 'int')