def main(): # LODATAStruct_mul flat_test_workloads = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flat_test_workloads = [e for l in flat_test_workloads for e in l] lods = make_LODatastruct_mul(flat_test_workloads, DATA_FOLDER, CONFIG_PATH, with_intensive=False, si=True, destroy=False) print("Running tests on newly created lods object....") check_shared_configs_consistency(lods) assert_no_shared_in_test(lods) assert_no_shared_in_test(lods, 'shared_traincomplement') lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_mul_var.bin"), destroy=True) # Now reads the serialized object and repeat the tests print("\nRepeating tests after reading serialized lods...") lods = None lods = LODataStruct.load_from_file(os.path.join(OUTPUT_FOLDER, "lods_mul_var.bin"), autobuild=True) check_shared_configs_consistency(lods) assert_no_shared_in_test(lods) assert_no_shared_in_test(lods, 'shared_traincomplement')
def main(): if not os.path.exists(OUT_FOLDER): os.makedirs(OUT_FOLDER) for fname in os.listdir(IN_FOLDER): if ".bin" in fname: fpath = os.path.join(IN_FOLDER, fname) lods = LODataStruct.load_from_file(os.path.join(fpath), autobuild=False) lods.serialize(os.path.join(OUT_FOLDER, fname), destroy=True)
def main(): print("Loading LODS...") lods = {} lods_mul = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_mul.bin"), autobuild=True) lods["mul"] = lods_mul for t in TEMPLATES: lods[t] = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_{}.bin".format(t)), autobuild=True) print("LODS loaded, starting tests...") # 1) Make sure test data the same between LODS_X and LODS_mul test_eval_jobs(lods, 'test') test_eval_splits(lods, 'test') # 2) Make sure observation data is the same between LODS_X and LODS_mul test_eval_jobs(lods, 'traincomplement') test_eval_splits(lods, 'traincomplement') # 3) Make sure training data points in LODS_X also appear in LODS_mul test_training_jobs(lods, "trainval") test_training_splits(lods, "trainval")
def read_lods(describe=False): # Autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join( LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild csv_folder = "../../../../datasets/tpcx-bb/" lods.folder = csv_folder lods._autobuild() # Notice that I didn't call minmaxscale, because Ottertune code does that. if describe: lods.describe() return lods
def get_lods(describe=False): # autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join(LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild csv_folder = "../../../../datasets/tpcx-bb/" lods.folder = csv_folder lods._autobuild() lods.minmaxscale("X") lods.minmaxscale("Y") if describe: lods.describe() return lods
def get_lods(describe=False): # autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join( LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild lods.id_to_fname = None # backward compatibility for streaming csv_folder = "../../../../datasets/streaming/" lods.folder = csv_folder lods._autobuild() lods.minmaxscale("X") lods.minmaxscale("Y") if describe: lods.describe() return lods
def main(): if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER) config_dict = get_config_dict() flat_test_workloads = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flat_test_workloads = [e for l in flat_test_workloads for e in l] lods = make_LODatastruct_mul( flat_test_workloads, DATA_FOLDER, CONFIG_PATH, with_intensive=WITH_INTENSIVE, si=SEPARATE_INTERSECTIONS_MUL, config_dict=config_dict, shared_within_templates=SHARED_WITHIN_TEMPLATES) sd = lods.get_split_definitions() for temp in TEMPLATES: print("[making LODS_{}]".format(temp)) make_LODatastruct_X( TEMPLATES[temp], TEST_WORKLOADS[temp], DATA_FOLDER, CONFIG_PATH, with_intensive=WITH_INTENSIVE, split_definitions=sd, X=temp, separate_intersections=SEPARATE_INTERSECTIONS_X, config_dict=config_dict, shared_within_templates=SHARED_WITHIN_TEMPLATES) autobuild = DESTROY_ON_SERIALIZE print("Loading LODS...") lods = {} lods_mul = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_mul.bin"), autobuild=DESTROY_ON_SERIALIZE) lods["mul"] = lods_mul print("LODS_mul loaded...") for t in TEMPLATES: print("Loading LODS_{}".format(t)) lods[t] = LODataStruct.load_from_file( os.path.join(OUTPUT_FOLDER, "lods_{}.bin".format(t)), autobuild=DESTROY_ON_SERIALIZE) for lod_name in lods: lods[lod_name].minmaxscale("X") lods[lod_name].minmaxscale("Y") print("LODS loaded, starting tests...") print("**** CONSISTENCY TESTS (LODS_mul & LODS_X) *****") test_eval_jobs(lods, 'test') test_eval_splits(lods, 'test') test_eval_jobs(lods, 'traincomplement') test_eval_splits(lods, 'traincomplement') test_training_jobs(lods, "trainval") test_training_splits(lods, "trainval") print("**** ***** *****") if SEPARATE_INTERSECTIONS_MUL: print("***** SHARED/UNSHARED TESTS ON LODS_mul *****") check_shared_configs_consistency(lods["mul"]) assert_no_shared_in_test(lods["mul"]) assert_no_shared_in_test(lods["mul"], 'shared_traincomplement')