def test_get_column_schema__valid_schema_type_but_invalid_schema_column__raises_non_oed_schema_column_error(self, schema_type): column = np.random.choice([ col for stype, col_dict in get_grouped_master_schema().items() for col in col_dict if stype != schema_type ]) with self.assertRaises(NonOedSchemaColumnError): get_column_schema(schema_type, column)
def test_get_column_schema__valid_reinsscope_column(self, column): exp_col_schema = self.reinsscope_schema[('reinsscope', column.lower())] res_col_schema = get_column_schema('reinsscope', column.lower()) self.assertEqual(exp_col_schema, res_col_schema)
def test_get_column_schema__valid_loc_column(self, column): exp_col_schema = self.loc_schema[('loc', column.lower())] res_col_schema = get_column_schema('loc', column.lower()) self.assertEqual(exp_col_schema, res_col_schema)
def test_get_column_schema__valid_schema_type_but_invalid_column__raises_non_oed_column_error(self, schema_type, column, random_str): with self.assertRaises(NonOedColumnError): get_column_schema(schema_type, column + random_str)
def test_get_column_schema__master_schema_type__raises_oed_error(self, column): with self.assertRaises(OedError): get_column_schema('master', column)
def sample_column(schema_type, header, str_width=None, size=10): """ Sampling values in a given column in a given schema (``acc``, ``loc``, ``reinsinfo``, ``reinsscope``), consistent with the validation method for the column (if there is one), or with the column range (if a column range is defined), or with the data type range. :param schema_type: OED schema type indicator (``master``, ``loc``, ``acc``, ``reinsinfo``, or ``reinsscope``) :type schema_type: str :param header: The column header :type column: str :param str_width: Optional argument applicable only to string type columns with no defined column range and/or validation method. If applicable this option sets a fixed width for the individual string values sampled for the column :type str_width: int :param size: Number of values to sample :type size: int :return: Sampled values :rtype: list """ if size <= 0: size = 10 col_schema = get_column_schema(schema_type, header) if col_schema['py_dtype'] is None: return py_dtype = getattr(builtins, col_schema['py_dtype']) dtype_range = col_schema['dtype_range'] column_range = col_schema['column_range'] use_range = column_range or dtype_range try: sampling_info = json.loads(col_schema['column_sampling']) except (JSONDecodeError, TypeError, ValueError): sampling_info = sampling_func = None else: sampling_func = get_method(sampling_info['func']) if py_dtype in [ int, float, str ] and column_range is not None and sampling_func is not None: return [ sampling_func(column_range, *sampling_info['args'][1:]) for i in range(size) ] elif py_dtype is int: return (np.random.randint( use_range.start, use_range.stop, size=size).tolist() if isinstance( use_range, range) else np.random.choice(use_range, size=size).tolist()) elif py_dtype is float: return (np.random.uniform(max(min(use_range), -1.79e+307), min(max(use_range), +1.79e+307), size=size).tolist()) elif py_dtype is str and column_range is not None and sampling_func is None: return [np.random.choice(column_range) for i in range(size)] elif py_dtype is str and column_range is None: return [ ''.join( np.random.choice(list(string.ascii_letters + string.digits), size=(str_width or 20))) for i in range(size) ]