def test_generate(model, **unused): for row_count in [0, 1, 100]: for density in [0.0, 0.5, 1.0]: with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath('config.pb.gz') config = { 'generate': { 'row_count': row_count, 'density': density, }, } loom.config.config_dump(config, config_in) assert_found(config_in) rows_out = os.path.abspath('rows.pbs.gz') model_out = os.path.abspath('model.pb.gz') groups_out = os.path.abspath('groups') loom.runner.generate( config_in=config_in, model_in=model, rows_out=rows_out, model_out=model_out, groups_out=groups_out, debug=True) assert_found(rows_out, model_out, groups_out) group_counts = get_group_counts(groups_out) print 'group_counts: {}'.format( ' '.join(map(str, group_counts)))
def test_tare(rows, schema_row, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): tares = os.path.abspath('tares.pbs.gz') loom.runner.tare(schema_row_in=schema_row, rows_in=rows, tares_out=tares) assert_found(tares)
def test_make_schema(model, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): schema_out = os.path.abspath('schema.json.gz') loom.format.make_schema( model_in=model, schema_out=schema_out) assert_found(schema_out)
def test_posterior_enum(name, tares, diffs, init, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath('config.pb.gz') config = { 'posterior_enum': { 'sample_count': 7, }, 'kernels': { 'kind': { 'row_queue_capacity': 0, 'score_parallel': False, }, }, } loom.config.config_dump(config, config_in) assert_found(config_in) samples_out = os.path.abspath('samples.pbs.gz') loom.runner.posterior_enum(config_in=config_in, model_in=init, tares_in=tares, rows_in=diffs, samples_out=samples_out, debug=True) assert_found(samples_out) actual_count = sum(1 for _ in protobuf_stream_load(samples_out)) assert_equal(actual_count, config['posterior_enum']['sample_count'])
def test_generate_init(encoding, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): init_out = os.path.abspath('init.pb.gz') loom.generate.generate_init( encoding_in=encoding, model_out=init_out) assert_found(init_out)
def test_make_fake_encoding(schema, model, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): encoding_out = os.path.abspath('encoding.json.gz') loom.format.make_fake_encoding(schema_in=schema, model_in=model, encoding_out=encoding_out) assert_found(encoding_out)
def test_generate(model, **unused): for row_count in [0, 1, 100]: for density in [0.0, 0.5, 1.0]: with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath('config.pb.gz') config = { 'generate': { 'row_count': row_count, 'density': density, }, } loom.config.config_dump(config, config_in) assert_found(config_in) rows_out = os.path.abspath('rows.pbs.gz') model_out = os.path.abspath('model.pb.gz') groups_out = os.path.abspath('groups') loom.runner.generate(config_in=config_in, model_in=model, rows_out=rows_out, model_out=model_out, groups_out=groups_out, debug=True) assert_found(rows_out, model_out, groups_out) group_counts = get_group_counts(groups_out) print 'group_counts: {}'.format(' '.join(map( str, group_counts)))
def test_posterior_enum(name, tares, diffs, init, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath('config.pb.gz') config = { 'posterior_enum': { 'sample_count': 7, }, 'kernels': { 'kind': { 'row_queue_capacity': 0, 'score_parallel': False, }, }, } loom.config.config_dump(config, config_in) assert_found(config_in) samples_out = os.path.abspath('samples.pbs.gz') loom.runner.posterior_enum( config_in=config_in, model_in=init, tares_in=tares, rows_in=diffs, samples_out=samples_out, debug=True) assert_found(samples_out) actual_count = sum(1 for _ in protobuf_stream_load(samples_out)) assert_equal(actual_count, config['posterior_enum']['sample_count'])
def test_tare(rows, schema_row, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): tares = os.path.abspath('tares.pbs.gz') loom.runner.tare( schema_row_in=schema_row, rows_in=rows, tares_out=tares) assert_found(tares)
def test_sparsify(rows, schema_row, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): tares = os.path.abspath("tares.pbs.gz") diffs = os.path.abspath("diffs.pbs.gz") loom.runner.tare(schema_row_in=schema_row, rows_in=rows, tares_out=tares) assert_found(tares) loom.runner.sparsify(schema_row_in=schema_row, tares_in=tares, rows_in=rows, rows_out=diffs, debug=True) assert_found(diffs)
def test_make_fake_encoding(schema, model, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): encoding_out = os.path.abspath('encoding.json.gz') loom.format.make_fake_encoding( schema_in=schema, model_in=model, encoding_out=encoding_out) assert_found(encoding_out)
def test_shuffle(diffs, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): seed = 12345 rows_out = os.path.abspath('shuffled.pbs.gz') loom.runner.shuffle( rows_in=diffs, rows_out=rows_out, seed=seed) assert_found(rows_out)
def test_import_rows(encoding, rows, rows_csv, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): rows_pbs = os.path.abspath('rows.pbs.gz') loom.format.import_rows(encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows_pbs) assert_found(rows_pbs) expected_count = sum(1 for _ in protobuf_stream_load(rows)) actual_count = sum(1 for _ in protobuf_stream_load(rows_pbs)) assert_equal(actual_count, expected_count)
def test_import_rows(encoding, rows, rows_csv, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): rows_pbs = os.path.abspath('rows.pbs.gz') loom.format.import_rows( encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows_pbs) assert_found(rows_pbs) expected_count = sum(1 for _ in protobuf_stream_load(rows)) actual_count = sum(1 for _ in protobuf_stream_load(rows_pbs)) assert_equal(actual_count, expected_count)
def test_make_encoding(schema, rows_csv, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): encoding = os.path.abspath('encoding.json.gz') rows = os.path.abspath('rows.pbs.gz') loom.format.make_encoding(schema_in=schema, rows_in=rows_csv, encoding_out=encoding) assert_found(encoding) loom.format.import_rows(encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows) assert_found(rows)
def test_make_encoding(schema, rows_csv, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): encoding = os.path.abspath('encoding.json.gz') rows = os.path.abspath('rows.pbs.gz') loom.format.make_encoding( schema_in=schema, rows_in=rows_csv, encoding_out=encoding) assert_found(encoding) loom.format.import_rows( encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows) assert_found(rows)
def test_sparsify(rows, schema_row, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): tares = os.path.abspath('tares.pbs.gz') diffs = os.path.abspath('diffs.pbs.gz') loom.runner.tare(schema_row_in=schema_row, rows_in=rows, tares_out=tares) assert_found(tares) loom.runner.sparsify(schema_row_in=schema_row, tares_in=tares, rows_in=rows, rows_out=diffs, debug=True) assert_found(diffs)
def test_one_to_one(rows, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): seed = 12345 rows_out = os.path.abspath('rows_out.pbs.gz') loom.runner.shuffle(rows_in=rows, rows_out=rows_out, seed=seed) assert_found(rows_out) original = load_rows(rows) shuffled = load_rows(rows_out) assert_equal(len(shuffled), len(original)) assert_not_equal(shuffled, original) actual = sorted(shuffled, key=lambda row: row.id) expected = sorted(original, key=lambda row: row.id) assert_list_equal(expected, actual)
def test_posterior_enum(name, tares, diffs, init, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath("config.pb.gz") config = { "posterior_enum": {"sample_count": 7}, "kernels": {"kind": {"row_queue_capacity": 0, "score_parallel": False}}, } loom.config.config_dump(config, config_in) assert_found(config_in) samples_out = os.path.abspath("samples.pbs.gz") loom.runner.posterior_enum( config_in=config_in, model_in=init, tares_in=tares, rows_in=diffs, samples_out=samples_out, debug=True ) assert_found(samples_out) actual_count = sum(1 for _ in protobuf_stream_load(samples_out)) assert_equal(actual_count, config["posterior_enum"]["sample_count"])
def test_one_to_one(rows, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): seed = 12345 rows_out = os.path.abspath('rows_out.pbs.gz') loom.runner.shuffle( rows_in=rows, rows_out=rows_out, seed=seed) assert_found(rows_out) original = load_rows(rows) shuffled = load_rows(rows_out) assert_equal(len(shuffled), len(original)) assert_not_equal(shuffled, original) actual = sorted(shuffled, key=lambda row: row.id) expected = sorted(original, key=lambda row: row.id) assert_list_equal(expected, actual)
def test_generate(model, **unused): for row_count in [0, 1, 100]: for density in [0.0, 0.5, 1.0]: with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): config_in = os.path.abspath("config.pb.gz") config = {"generate": {"row_count": row_count, "density": density}} loom.config.config_dump(config, config_in) assert_found(config_in) rows_out = os.path.abspath("rows.pbs.gz") model_out = os.path.abspath("model.pb.gz") groups_out = os.path.abspath("groups") loom.runner.generate( config_in=config_in, model_in=model, rows_out=rows_out, model_out=model_out, groups_out=groups_out, debug=True, ) assert_found(rows_out, model_out, groups_out) group_counts = get_group_counts(groups_out) print "group_counts: {}".format(" ".join(map(str, group_counts)))
def test_export_rows(encoding, rows, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): rows_csv = os.path.abspath('rows_csv') rows_pbs = os.path.abspath('rows.pbs.gz') loom.format.export_rows(encoding_in=encoding, rows_in=rows, rows_csv_out=rows_csv, chunk_size=51) assert_found(rows_csv) assert_found(os.path.join(rows_csv, 'rows.0.csv.gz')) loom.format.import_rows(encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows_pbs) assert_found(rows_pbs) expected = load_rows(rows) actual = load_rows(rows_pbs) assert_equal(len(actual), len(expected)) actual.sort(key=lambda row: row.id) expected.sort(key=lambda row: row.id) expected_data = [row.diff for row in expected] actual_data = [row.diff for row in actual] assert_close(actual_data, expected_data)
def test_export_rows(encoding, rows, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): rows_csv = os.path.abspath('rows_csv') rows_pbs = os.path.abspath('rows.pbs.gz') loom.format.export_rows( encoding_in=encoding, rows_in=rows, rows_csv_out=rows_csv, chunk_size=51) assert_found(rows_csv) assert_found(os.path.join(rows_csv, 'rows.0.csv.gz')) loom.format.import_rows( encoding_in=encoding, rows_csv_in=rows_csv, rows_out=rows_pbs) assert_found(rows_pbs) expected = load_rows(rows) actual = load_rows(rows_pbs) assert_equal(len(actual), len(expected)) actual.sort(key=lambda row: row.id) expected.sort(key=lambda row: row.id) expected_data = [row.diff for row in expected] actual_data = [row.diff for row in actual] assert_close(actual_data, expected_data)
def test_shuffle(diffs, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): seed = 12345 rows_out = os.path.abspath('shuffled.pbs.gz') loom.runner.shuffle(rows_in=diffs, rows_out=rows_out, seed=seed) assert_found(rows_out)