def _save_results(self, suffix: str, info_path: str, results: List[Dict], k_values: List[List]): path = Tools.splice_save_problemsets_dictionaries( results, metadata_fpath=info_path, suffix=suffix, test_data=not train_phase) Tools.save_k_vals_as_df(k_vals=k_values, suffix=suffix, test_data=not train_phase, cop_kmeans_frac=constraints_fraction) return path
# (comparing different runs of HDP on a problem set) seed=max(33, 70*(ps == 41)) + (3 * (ps in problematics)), infer_lss=False, verbose=False, configuration=config_neutral, drop_uncommon=True) problemsets_results.append(ps_result) ks = clu.cand_k.copy() ks.append(1+max(clu.true_labels)) k_vals.append(ks) my_suffix = "_training_neutral_common" info_json = r"..\..\Datasets\pan17_train\info.json" Tools.splice_save_problemsets_dictionaries(problemsets_results, metadata_fpath=info_json, suffix=my_suffix) Tools.save_k_vals_as_df(k_vals=k_vals, suffix=my_suffix) print("==================== SPARSE ====================") problemsets_results = [] k_vals = [] for ps in range(1, 61): print(f"Executing on problem set ► {ps:03d} ◄ ..") ps_result, l, lss, plain, clu = problem_set_run( problem_set_id=ps, n_clusters=None, # Emperically specify a random seed that's compatible with # hyper sampling and certain problem sets due to a bug in HDP # as it seems. However, the seeds would be consistant across # runs and yield comparable results for our experiments # (comparing different runs of HDP on a problem set) seed=13712 * ps,