def test_strings_from_regex_digit(): generator, size = strings_from_regex('[0-9]') assert size == 10 assert list(generator) == [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
def _get_primary_keys(self, table_name, num_rows): """Return the primary key and amount of values for the requested table. Args: table_name (str): Name of the table to get the primary keys from. num_rows (str): Number of ``primary_keys`` to generate. Returns: tuple (str, pandas.Series): primary key name and primary key values. If the table has no primary key, ``(None, None)`` is returned. Raises: ValueError: If the ``metadata`` contains invalid types or subtypes, or if there are not enough primary keys left on any of the generators. NotImplementedError: If the primary key subtype is a ``datetime``. """ primary_key = self.metadata.get_primary_key(table_name) field = self.metadata.get_fields(table_name)[primary_key] generator = self._primary_key_generators.get(table_name) if generator is None: if field['type'] != 'id': raise ValueError('Only columns with type `id` can be primary keys') subtype = field.get('subtype', 'integer') if subtype == 'integer': generator = itertools.count() remaining = np.inf elif subtype == 'string': regex = field.get('regex', r'^[a-zA-Z]+$') generator, remaining = utils.strings_from_regex(regex) elif subtype == 'datetime': raise NotImplementedError('Datetime ids are not yet supported') else: raise ValueError('Only `integer` or `string` id columns are supported.') self._primary_key_generators[table_name] = generator self._remaining_primary_keys[table_name] = remaining else: remaining = self._remaining_primary_keys[table_name] if remaining < num_rows: raise ValueError( 'Not enough unique values for primary key of table {}' ' to generate {} samples.'.format(table_name, num_rows) ) self._remaining_primary_keys[table_name] -= num_rows primary_key_values = pd.Series([x for i, x in zip(range(num_rows), generator)]) return primary_key_values
def test_strings_from_regex_repeat_digit(): generator, size = strings_from_regex(r'\d{1,3}') assert size == 1110 strings = list(generator) assert strings[0] == '0' assert strings[-1] == '999'
def _make_ids(cls, field_metadata, length): field_subtype = field_metadata.get('subtype', 'integer') if field_subtype == 'string': regex = field_metadata.get('regex', '[a-zA-Z]+') generator, max_size = strings_from_regex(regex) if max_size < length: raise ValueError( ('Unable to generate {} unique values for regex {}, the ' 'maximum number of unique values is {}.').format( length, regex, max_size)) values = [next(generator) for _ in range(length)] return pd.Series(list(values)[:length]) else: return pd.Series(np.arange(length))
def test_strings_from_regex_literal(): generator, size = strings_from_regex('abcd') assert size == 1 assert list(generator) == ['abcd']
def test_strings_from_regex_repeat_literal(): generator, size = strings_from_regex('a{1,3}') assert size == 3 assert list(generator) == ['a', 'aa', 'aaa']