def read_partition_performance(self, version_stype, input_stype, debug=False): """ Read new partitions from self.dirs['new'], and put the comparison numbers in self.perf_info (compare either to true, for simulation, or to the partition in reference dir, for data). """ ptest = "partition-" + input_stype + "-simu" if args.quick and ptest not in self.quick_tests: return if debug: print " version %s input %s partitioning" % (version_stype, input_stype) print " adj mi ccf under/over test description" for ptest in [k for k in self.tests.keys() if "partition" in k and input_stype in k]: if args.quick and ptest not in self.quick_tests: continue cp = ClusterPath(-1) cp.readfile(self.dirs[version_stype] + "/" + ptest + ".csv") if "data" in ptest: raise Exception("needs fixing") ref_cp = ClusterPath(-1) ref_cp.readfile(self.dirs["xxxref"] + "/" + ptest + ".csv") self.perf_info["xxx"][ptest] = utils.adjusted_mutual_information( cp.partitions[cp.i_best], ref_cp.partitions[ref_cp.i_best] ) # adj mi between the reference and the new data partitions if debug: print " %5.2f %-28s to reference partition" % (self.perf_info["xxx"][ptest], ptest) else: self.perf_info[version_stype][ptest + "-adj_mi"] = cp.adj_mis[cp.i_best] # adj mi to true partition self.perf_info[version_stype][ptest + "-ccf_under"], self.perf_info[version_stype][ ptest + "-ccf_over" ] = cp.ccfs[cp.i_best] if debug: print " %5.2f %5.2f %5.2f %-28s to true partition" % ( self.perf_info[version_stype][ptest + "-adj_mi"], self.perf_info[version_stype][ptest + "-ccf_under"], self.perf_info[version_stype][ptest + "-ccf_over"], ptest, )
def calculate_missing_values(self, reco_info, only_ip=None): for ip in range(len(self.partitions)): if only_ip is not None and ip != only_ip: continue if self.adj_mis[ip] is not None: # already have it/them assert self.ccfs[ip][0] is not None and self.ccfs[ip][1] is not None continue true_partition = utils.get_true_partition(reco_info, ids=[uid for cluster in self.partitions[ip] for uid in cluster]) self.adj_mis[ip] = utils.adjusted_mutual_information(self.partitions[ip], true_partition) assert self.ccfs[ip] == [None, None] self.ccfs[ip] = utils.correct_cluster_fractions(self.partitions[ip], reco_info) self.we_have_an_adj_mi = True