def test_nested_count_edge_cases(count, as_list, expected): inner_spec = builder.spec_builder().values("inner", ["a", "b", "c"]).to_spec() spec = builder.spec_builder().nested("outer", inner_spec, count=count, as_list=as_list).to_spec() generator = spec.generator(1) single_record = next(generator) assert single_record['outer'] == expected
def test_single_nested(): # Geo # - Place # - Coord geo_spec = builder.spec_builder() \ .add_field("place_id:uuid", {}) \ .add_field("coordinates", builder.geo_pair(as_list=True)) spec = builder.spec_builder() \ .add_field("id:uuid", {}) \ .add_field("geo", builder.nested(fields=geo_spec.build())) \ .build() supplier = Loader(spec).get('geo') first = supplier.next(0) assert isinstance(first, dict) assert list(first.keys()) == ['place_id', 'coordinates']
def test_mac_address_dashes(): spec_builder = builder.spec_builder() spec_builder.mac('mac', dashes='true') spec = spec_builder.build() value = next(spec.generator(1, enforce_schema=True))['mac'] assert len(value) == 17 assert '-' in value, f'No dashes in: {value}'
def test_api_change(): animal_names = ['zebra', 'hedgehog', 'llama', 'flamingo'] action_list = ['fling', 'jump', 'launch', 'dispatch'] domain_weights = {"gmail.com": 0.6, "yahoo.com": 0.3, "hotmail.com": 0.1} # for building the final spec spec_builder = builder.spec_builder() # for building the regs section of the spec refs = spec_builder.refs_builder # spec for each field and reference animals = refs.values('ANIMALS', data=animal_names) actions = refs.values('ACTIONS', data=action_list) domains = refs.values('DOMAINS', data=domain_weights) # combines ANIMALS and ACTIONS handles = refs.combine('HANDLE', refs=[animals, actions], join_with='_') spec_builder.combine('email', refs=[handles, domains], join_with='@') spec = spec_builder.build() first = next(spec.generator(1)) assert first['email'].startswith('zebra_fling@')
def test_load_spec_undefined_refs(): spec_undefined_refs = builder.spec_builder() \ .add_field('foo', builder.combine(['ONE', 'TWO'])) \ .build() loader = datacraft.Loader(spec_undefined_refs) with pytest.raises(datacraft.SpecException): loader.get('foo')
def test_add_refs(): spec_builder = builder.spec_builder() spec_builder.add_refs(foo=builder.values(['bob', 'joe', 'bobby joe']), bar=builder.values([1, 2, 3])) spec = spec_builder.build() assert 'foo' in spec.get('refs', []) assert 'bar' in spec.get('refs', [])
def test_spec_builder(): spec_builder = builder.spec_builder() spec = spec_builder.values('name', ['a', 'b', 'c']).to_spec() assert isinstance(spec, DataSpec) single = next(spec.generator(1)) assert single == {'name': 'a'}
def test_add_fields(): spec_builder = builder.spec_builder() spec_builder.add_fields(foo=builder.templated('{{first}}: {{last}}'), bar=builder.values([1, 2, 3])) spec = spec_builder.build() assert 'foo' in spec assert 'bar' in spec
def test_shorthand_key_support(key_as_spec, field_name, first_value_contains): spec_builder = builder.spec_builder() spec_builder.add_field(key_as_spec, {}) spec = spec_builder.build() first = next(spec.generator(1, enforce_schema=True)) assert field_name in first assert first_value_contains in str(first[field_name])
def test_load_ref_by_name(): refs_only_spec = builder.spec_builder() \ .add_ref('ONE', 'uno') \ .add_ref('TWO', 'dos') \ .build() loader = datacraft.Loader(refs_only_spec) assert loader.get('ONE').next(0) == 'uno' assert loader.get('TWO').next(0) == 'dos'
def test_distribution_as_count(): spec_builder = builder.spec_builder() distribution = distributions.uniform(1, 3) spec = spec_builder.values('name', ['a', 'b', 'c'], count=distribution).to_spec() assert isinstance(spec, DataSpec) single = next(spec.generator(1)) assert 1 <= len(single['name']) < 3
def test_rand_range(): spec = builder.spec_builder() \ .add_field("field", builder.rand_range([100.9, 109.9], cast="int")) \ .build() supplier = Loader(spec).get('field') first = supplier.next(0) assert str(first).isnumeric() # occasionally gets rounded down to 100 assert 100 <= first <= 110
def test_select_list_ref_contains_data(): spec_builder = builder.spec_builder() spec_builder.select_list_subset('pets', data=None, ref_name='pets_list', count=2) spec_builder.refs().values(key='pets_list', data=['goat', 'sheep', 'bear', 'cow', 'dragon']) loader = Loader(spec_builder.build()) supplier = loader.get('pets') value = supplier.next(0) assert isinstance(value, list) assert len(value) == 2
def test_to_pandas(): spec_builder = builder.spec_builder() spec_builder.rand_range('id', [1, 100], cast='int') spec_builder.geo_lat('lat') spec_builder.geo_long('lon') df = spec_builder.build().to_pandas(30) assert len(df) == 30 assert sorted(list(df.columns)) == sorted(['id', 'lat', 'lon']) assert df.lat.min() >= -90 assert df.lon.min() >= -180 assert df.lat.max() <= 90 assert df.lon.max() <= 180
def test_multi_nested(): # User # - Geo # - - Place # - - Coord geo_spec = builder.spec_builder() \ .add_field("place_id:uuid", {}) \ .add_field("coordinates", builder.geo_pair(as_list=True)) user_spec = builder.spec_builder() \ .add_field("user_id:uuid", {}) \ .add_field("geo", builder.nested(fields=geo_spec.build())) spec = builder.spec_builder() \ .add_field("id:uuid", {}) \ .add_field("user", builder.nested(fields=user_spec.build())) \ .build() supplier = Loader(spec).get('user') first = supplier.next(0) assert isinstance(first, dict) assert list(first.keys()) == ['user_id', 'geo'] second = first['geo'] assert isinstance(second, dict) assert list(second.keys()) == ['place_id', 'coordinates']
def test_api_builder(): # raw data for both specs animal_names = ['zebra', 'hedgehog', 'llama', 'flamingo'] action_list = ['fling', 'jump', 'launch', 'dispatch'] domain_weights = {"gmail.com": 0.6, "yahoo.com": 0.3, "hotmail.com": 0.1} # first build uses direct build methods builder1 = builder.spec_builder() refs1 = builder1.refs_builder domains = refs1.values('DOMAINS', domain_weights) animals = refs1.values('ANIMALS', animal_names) actions = refs1.values('ACTIONS', action_list, sample=True) handles = refs1.combine('HANDLE', join_with='_') builder1.combine('email', refs=[handles, domains]) spec1 = builder1.build() # second builder uses the fluent api style builder2 = builder.spec_builder() animals_spec = builder.values(data=animal_names) actions_spec = builder.values(data=action_list, sample=True) domains_spec = builder.values(data=domain_weights) # combines ANIMALS and ACTIONS handle_spec = builder.combine(refs=['ANIMALS', 'ACTIONS'], join_with='_') builder2.add_ref('DOMAINS', domains_spec) \ .add_ref('ANIMALS', animals_spec) \ .add_ref('ACTIONS', actions_spec) \ .add_ref('HANDLE', handle_spec) builder2.add_field('email', builder.combine(refs=['HANDLE', 'DOMAINS'])) spec2 = builder2.build() assert spec1 == spec2
def test_load_spec_weighted_ref(): ref_weights = { "POSITIVE": 0.5, "NEGATIVE": 0.4, "NEUTRAL": 0.1 } weighted_ref = builder.spec_builder() \ .add_field('foo', builder.weighted_ref(ref_weights)) \ .add_ref('POSITIVE', ['yes']) \ .add_ref('NEGATIVE', ['no']) \ .add_ref('NEUTRAL', ['meh']) \ .build() loader = datacraft.Loader(weighted_ref) supplier = loader.get('foo') # expect mostly positive and negative values data = [supplier.next(i) for i in range(0, 100)] counter = Counter(data) # get the top two most common entries, which should be yes and no most_common_keys = [item[0] for item in counter.most_common(2)] assert 'yes' in most_common_keys assert 'no' in most_common_keys
def _geo_pair_spec(**config): return builder.spec_builder() \ .add_field('pair', builder.geo_pair(**config)) \ .build()
def _ip_precise_spec(**config): return builder.spec_builder() \ .add_field('network', builder.ip_precise(**config)) \ .build()
def _ipv4_spec(**config): return builder.spec_builder() \ .add_field('network', builder.ipv4(**config)) \ .build()
def _date_iso_us_spec(**config): return builder.spec_builder().add_field( 'foo', builder.date_iso_us(**config)).build()
def _char_class_spec(data, **config): return builder.spec_builder() \ .add_field("name", builder.char_class(data=data, **config)) \ .build()
def _geo_long_spec(**config): return builder.spec_builder() \ .add_field('long', builder.geo_long(**config)) \ .build()
def test_add_refs_edge_cases(): spec_builder = builder.spec_builder() spec_builder.templated('foo', data='{{first}}: {{last}}') spec_builder.add_ref('foo', builder.templated('{{first}}: {{last}}')) spec = spec_builder.build() assert 'foo' in spec
"type": "values", "data": 1 } } }), ] @pytest.mark.parametrize("generated_spec,expected_spec", field_spec_build_tests) def test_spec_builder(generated_spec, expected_spec): assert generated_spec == expected_spec full_spec_build_tests = [ (builder.spec_builder().values('name', [1, 2, 3], prefix="foo"), { "name": { "type": "values", "data": [1, 2, 3], "config": { "prefix": "foo" } } }), (builder.spec_builder().combine('name', refs=["ONE", "TWO"], join_with='@'), { "name": { "type": "combine", "refs": ["ONE", "TWO"], "config": { "join_with": "@"
def one_two_builder(): spec_builder = builder.spec_builder() spec_builder.values('one', ["uno", "ichi"]) spec_builder.values('two', ["dos", "ni"]) return spec_builder
def test_default_delim_mac_address(): spec = builder.spec_builder() \ .add_field('mac', builder.mac()) \ .build() value = next(spec.generator(1))['mac'] assert len(value) == 17
def _cc_abbrev_spec(abbrev, **config): return builder.spec_builder() \ .add_field("name", builder.char_class_abbrev(cc_abbrev=abbrev, **config)) \ .build()
def _geo_lat_spec(**config): return builder.spec_builder() \ .add_field('lat', builder.geo_lat(**config)) \ .build()