for k, v in word_gts.iteritems(): base_name = k split_base_name = base_name.split('-') folder_and_base = "{}/{}-{}/{}".format(split_base_name[0], split_base_name[0], split_base_name[1], base_name) image_file = "{}.png".format(folder_and_base) compare_line = "-".join(base_name.split("-")[:3]) if not compare_line in in_set: continue data_set.append({ "gt": v['gt'], "image_path": image_file, "err": v['err'] }) return data_set def get_gt(in_set): author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml') data_set = prep_data_set(word_gts, in_set) return data_set if "__main__" == __name__: training_set, val1_set, val2_set, test_set = load_set.load() data_set = get_gt(training_set) print len(data_set)
import generate_gt_from_txt_w import generate_gt_from_xml_w import load_set class hashabledict(dict): def __key(self): return tuple((k,self[k]) for k in sorted(self)) def __hash__(self): return hash(self.__key()) def __eq__(self, other): return self.__key() == other.__key() if __name__ == "__main__": sets = load_set.load() for s in sets: xml_data_set = generate_gt_from_xml_w.get_gt(s) txt_data_set = generate_gt_from_txt_w.get_gt(s) xml_set = set([hashabledict(x) for x in xml_data_set]) txt_set = set([hashabledict(x) for x in txt_data_set]) print len(xml_set) print len(txt_set) print "Sym Diff (should be zero):", len(xml_set ^ txt_set)