def test_string_datum_good(self, mock1): gen = DataGenerator('char_gen_good.json', seed='1234567890') test_num = 500 data = [] for i in range(test_num): data.append(gen.generate()) occ = {} total = 0 for items in data: for key in items: total += 1 if key in occ: occ[key] += 1 else: occ[key] = 1 assert len(occ.keys()) == 4 for key in occ: assert occ[key] == test_num # for key in occ.keys(): # print("KEY: " + key + " | OCC: " + str(occ[key]) + " | PROB: " + str( occ[key]/float(test_num) )) assert total == 4 * test_num
def test_boolean_dat(self, mock1): gen = DataGenerator('boolean_gen.json') data = gen.generate() assert data['field1'] == True or data['field1'] == False for i in range(20): bool_dat = {} bool_dat['type'] = 'boolean' bool_dat['fieldName'] = 'f1' bool_dat['values'] = [] try: datum = BooleanDatum(bool_dat) self.fail('value was not of correct type') except AssertionError as e: pass finally: bool_dat['values'] = {} bool_dat['values']['True'] = 0.1 try: datum = BooleanDatum(bool_dat) self.fail('Probabilities were not all present') except AssertionError as e: pass finally: bool_dat['values']['False'] = 0.9 try: datum = BooleanDatum(bool_dat) val = datum.generate(random) except AssertionError as e: self.fail('Should not have thrown error here')
def test_string_datum_good(self, mock1): gen = DataGenerator('char_gen_good.json', seed='1234567890') test_num = 500 data = [] for i in range(test_num): data.append(gen.generate()) occ = {} total = 0 for items in data: for key in items: total += 1 if key in occ: occ[key] += 1 else: occ[key] = 1 assert len(occ.keys()) == 4 for key in occ: assert occ[key] == test_num # for key in occ.keys(): # print("KEY: " + key + " | OCC: " + str(occ[key]) + " | PROB: " + str( occ[key]/float(test_num) )) assert total == 4*test_num
def test_threaded_hdfs_write(self, mock1): dg = DataGenerator(test_config) data = [] for i in range(11): data.append(dg.generate()) gen = ThreadedGenerator(test_config, 10, ["HDFS"], 10) gen.hdfs_data_pool = data gen.export_hdfs({}) assert gen.hdfs_data_pool == [], 'Data pool should be empty'
def test_json_string_config(self): data = """[{"fieldName": "test_field", "type":"string", "values":["a", "b", "c"]}]""" vals = [] try: gen = DataGenerator(data) for i in range(50): data = gen.generate() if not data['test_field'] in vals: vals.append(data['test_field']) except ValueError as e: self.fail("Shouldn't have failed when generating data") assert len(vals) == 3, 'Should have at least 3 different values generated'
def test_json_string_config(self): data = """[{"fieldName": "test_field", "type":"string", "values":["a", "b", "c"]}]""" vals = [] try: gen = DataGenerator(data) for i in range(50): data = gen.generate() if not data['test_field'] in vals: vals.append(data['test_field']) except ValueError as e: self.fail("Shouldn't have failed when generating data") assert len( vals) == 3, 'Should have at least 3 different values generated'
def test_gen_key_check_type(self, mock1): try: gen = DataGenerator('char_gen_bad-02.json', seed='1234567890') self.fail('Should have failed with KeyError on type') except KeyError as e: print str(e) assert ('Could not find \'type\' in field of schema' in str(e))
def test_gen_check_field_type(self, mock1): try: gen = DataGenerator('char_gen_bad-04.json', seed='1234567890') self.fail('Should have failed with TypeError') except RuntimeError as e: assert ( 'was not found. Please change the field type or implement a new datum' in str(e))
def test_map_gen_good(self, mock1): try: gen = DataGenerator('map_gen-01.json') try: for i in range(50): data = gen.generate() if data['field1'] == 'y': assert data['field2'] == '' elif data['field1'] == 'a': assert data['field2'] == 'vowel' else: assert data['field2'] == 'consonant' except ValueError as e: print e pass except ValueError as e: print e pass
def test_num_datum_good(self, mock1): gen = DataGenerator('num_gen_good.json') test_num = 500 data = [] mu1 = 0 mu2 = 0 for z in range(test_num): dat = gen.generate() assert type(dat['field1']) == int assert type(dat['field5']) == float mu1 += float(dat['field1'])/test_num mu2 += dat['field5']/test_num data.append(dat) assert(len(dat) == 11) assert(abs(mu1 - 50) < 10) # ensure they are at least in the right general range assert(abs(mu2 - 100) < 10) # ensure they are at least in the right general range assert len(data) == test_num
def test_dup_fields(self, mock1): try: gen = DataGenerator('dup_fields.json') test_num = 10 data = [] mu1 = 0 mu2 = 0 except ValueError as e: assert 'Cannot have duplicate field names' in str(e)
def test_num_datum_good(self, mock1): gen = DataGenerator('num_gen_good.json') test_num = 500 data = [] mu1 = 0 mu2 = 0 for z in range(test_num): dat = gen.generate() assert type(dat['field1']) == int assert type(dat['field5']) == float mu1 += float(dat['field1']) / test_num mu2 += dat['field5'] / test_num data.append(dat) assert (len(dat) == 11) assert (abs(mu1 - 50) < 10 ) # ensure they are at least in the right general range assert (abs(mu2 - 100) < 10 ) # ensure they are at least in the right general range assert len(data) == test_num
def test_gen_check_values(self, mock1): try: gen = DataGenerator('char_gen_bad-05.json', seed='1234567890') self.fail('Should have failed with KeyError') except KeyError as e: assert ('Missing key: values in field3' in str(e))
def test_gen_key_check_root(self, mock1): try: gen = DataGenerator('char_gen_bad-03.json', seed='1234567890') self.fail('Should have failed with TypeError') except TypeError as e: assert ('Root of JSON Schema is not a list' in str(e))
def test_gen_key_check_field(self, mock1): try: gen = DataGenerator('char_gen_bad-01.json', seed='1234567890') self.fail('Should have failed with KeyError on fieldName') except KeyError as e: assert ("Could not find 'fieldName' in field of schema" in str(e))
def test_bad_distribution(self, mock1): try: gen = DataGenerator('bad_dist.json') except ValueError as e: assert 'Distribution can only be one of: uniform, exponential, gaussian, or gamma' in str( e)