def test_sanity_check(self): fschema = self.data_gen.get_faker_schema() schema_faker = FakerSchema() data = schema_faker.generate_fake(fschema, 1) # Generate one record. # Note at this point data[u'lo_orderdate'] is a datetime.date object while Biguery expects # a string self.assertIsInstance(data[u'lo_orderdate'], datetime.date) data = self.fakerowgen.sanity_check(record=data, fieldname=u'lo_orderdate') # Check that the date was converted to a string self.assertIsInstance(data[u'lo_orderdate'], unicode) # Check that the date is in the correct format _ = datetime.datetime.strptime(data[u'lo_orderdate'], '%Y-%m-%d') # Check if sanity check enforces integers < data_args.max_int data[u'lo_linenumber'] = 10**12 # Note that max_int is 10**11 data = self.fakerowgen.sanity_check(record=data, fieldname=u'lo_linenumber') self.assertLessEqual(data[u'lo_linenumber'], self.data_gen.max_int)
def generate_fake(self, fschema=None, key_dict=None): """ This method creates a single fake record based on the constraints defined in this FakeRowGen instance's data_gen attribute. Arguments: fschema (dict): Contains a faker_schema (this should be generated by DataGenerator.get_faker_schema() ) """ schema_faker = FakerSchema() # Drop the key columns because we do not need to randomly generate them. if key_dict: for key in list(key_dict.keys()): fschema.pop(key, None) # Generate a fake record. data = schema_faker.generate_fake(fschema, 1) # Generate one record. # This performs a sanity check on datatypes and parameterized # constraints. for col_name in data: data = self.sanity_check(data, col_name) if key_dict: keys = self.convert_key_types(key_dict) # Join the keys and the rest of the genreated data data.update(keys) data.pop('frequency') return json.dumps(data)
class TestFakerSchema(unittest.TestCase): def setUp(self): self.faker_schema = FakerSchema(faker=MockFaker()) def test_generate_fake_flat_schema(self): schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'} data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) def test_generate_fake_flat_schema_4_iterations(self): schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'} data = self.faker_schema.generate_fake(schema, iterations=4) self.assertIsInstance(data, list) self.assertEqual(len(data), 4) def test_generate_fake_nested_schema(self): schema = { 'Full Name': 'name', 'Location': { 'Address': 'street_address', 'City': 'city', 'Country': 'country', 'Postal Code': 'postalcode' } } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['Location'], dict) def test_generate_fake_schema_with_list(self): schema = { 'Employer': 'name', 'EmployeeList': [{ 'Employee1': 'name' }, { 'Employee2': 'name' }] } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['EmployeeList'], list)
def generate_fake(self, fschema): """ This method creates a single fake record based on the constraints defined int the FakeRowGen instance's data_gen attribute. Args: fschema: A dictionary containing a faker_schema (this should be generated by DataGenerator.get_faker_schema() ) """ # Initialize a FakerSchema object. schema_faker = FakerSchema() # Generate a fake record. data = schema_faker.generate_fake(fschema, 1) # Generate one record. # This performs a sanity check on datatypes and parameterized constraints. for col_name in data: data = self.sanity_check(data, col_name) return data
def fake_data(json_file, seed=0): """Return faked json data Args: json_file: example json file with data to simulate seed (int): Optionally seed Faker for reproducability Returns: dict: dict with summary and DataFrame with extracted data """ # get dict of visited places places = _create_places(total=max(NPLACES.values())) # Get json schema from json file with open(json_file) as file_object: json_data = json.load(file_object) json_schema = get_json_schema(json_data) fake = Faker('nl_NL') fake.add_provider(geo) faker = FakerSchema(faker=fake, locale='nl_NL') faked_data = {} for year in YEARS: for month in MONTHS: schema = get_faker_schema( json_schema["properties"], custom=SCHEMA_TYPES, iterations={"timelineObjects": NACTIVITIES[year]}) data = faker.generate_fake(schema) month_number = datetime.strptime(month[:3], '%b').month seed += 1 json_data = _update_data( data, datetime(year, month_number, 1), dict(itertools.islice(places.items(), NPLACES[year])), seed=seed) faked_data[(year, month)] = json_data return faked_data
def generate(schema): fake = Faker() fake.add_provider(BornProvider) faker = FakerSchema(faker=fake) return faker.generate_fake(load_json_from_file(schema))
class TestFakerSchema(unittest.TestCase): def setUp(self): self.faker_schema = FakerSchema(faker=MockFaker()) def test_generate_fake_flat_schema(self): schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'} data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) def test_generate_fake_flat_schema_4_iterations(self): schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'} data = self.faker_schema.generate_fake(schema, iterations=4) self.assertIsInstance(data, list) self.assertEqual(len(data), 4) def test_generate_fake_nested_schema(self): schema = {'Full Name': 'name', 'Location': {'Address': 'street_address', 'City': 'city', 'Country': 'country', 'Postal Code': 'postalcode'}} data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['Location'], dict) def test_generate_fake_schema_with_list(self): schema = {'Employer': 'name', 'EmployeeList': [{'Employee1': 'name'}, {'Employee2': 'name'}]} data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['EmployeeList'], list) def test_generate_fake_schema_with_choices(self): schema = { 'Person': 'name', 'Gender': '(Male,Female)' # simliar to fuzzy choices } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['Gender'], str) def test_generate_fake_schema_with_date(self): schema = { 'Person': 'name', 'Gender': '(Male,Female)', 'BirthDay': 'date_of_birth' # returns date with isoformat } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['BirthDay'], str) def test_generate_fake_schema_with_fake_args(self): schema = { 'Person': 'name', 'Gender': '(Male,Female)', 'BirthDay': 'date_of_birth', 'Age': 'random_int(18,80)' # passing arguments (age betweeen 18 and 80) } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['Age'], int) def test_generate_fake_schema_with_fake_kargs(self): schema = { 'Person': 'name', 'Gender': '(Male,Female)', 'BirthDay': 'date_of_birth', 'Age': 'random_int(min=18,max=80)' # passing named arguments (age betweeen min=18 and max=80) } data = self.faker_schema.generate_fake(schema) self.assertIsInstance(data, dict) self.assertIsInstance(data['Age'], int)