# lines = [s for s in f.readlines()] #lines = [s.strip() for s in lines if not s.startswith('#')] with open("task/trainset.txt") as f: training_set = set([s.strip() for s in f.readlines()]) with open("task/validationset1.txt") as f: val1_set = set([s.strip() for s in f.readlines()]) with open("task/validationset2.txt") as f: val2_set = set([s.strip() for s in f.readlines()]) with open("task/testset.txt") as f: test_set = set([s.strip() for s in f.readlines()]) author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml') def prep_set(data_set, lines_gts, author_mapping): all_authors = defaultdict(lambda: []) all_authors_baseline = defaultdict(lambda: []) for d in data_set: split_name = d.split('-') folder_and_base = "{}/{}-{}/{}".format(split_name[0], split_name[0], split_name[1], d) image_file = "lines/{}.png".format(folder_and_base) line_id = "{}-{}".format(split_name[0], split_name[1]) img = cv2.imread(image_file, 0) if img is None: print "There was an issue with ", image_file continue
def get_gt(in_set): author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml') data_set = prep_data_set(word_gts, in_set) return data_set