Пример #1
0
#    lines = [s for s in f.readlines()]
#lines = [s.strip() for s in lines if not s.startswith('#')]

with open("task/trainset.txt") as f:
    training_set = set([s.strip() for s in f.readlines()])

with open("task/validationset1.txt") as f:
    val1_set = set([s.strip() for s in f.readlines()])

with open("task/validationset2.txt") as f:
    val2_set = set([s.strip() for s in f.readlines()])

with open("task/testset.txt") as f:
    test_set = set([s.strip() for s in f.readlines()])

author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml')

def prep_set(data_set, lines_gts, author_mapping):

    all_authors = defaultdict(lambda: [])
    all_authors_baseline = defaultdict(lambda: [])
    for d in data_set:
        split_name = d.split('-')
        folder_and_base = "{}/{}-{}/{}".format(split_name[0], split_name[0], split_name[1], d)
        image_file = "lines/{}.png".format(folder_and_base)
        line_id = "{}-{}".format(split_name[0], split_name[1])

        img = cv2.imread(image_file, 0)
        if img is None:
            print "There was an issue with ", image_file
            continue
def get_gt(in_set):
    author_mapping, lines_gts, word_gts = prep_iam_writer_map.get_mapping('xml')
    data_set = prep_data_set(word_gts, in_set)
    return data_set