Пример #1
0
                        algorithms=dict())
    for pipeline in pipelines:
        print('[{:.2f}%] algorithm={} doc={} ::'.format(
            100 * processed / total, pipeline.algorithm.id(), doc),
              end='')
        processed += 1
        d = 2 if pipeline.algorithm.id() == 'marques' else 0
        pipeline.run(strips, d)
        results[doc]['algorithms'][pipeline.algorithm.id()] = dict()
        results[doc]['algorithms'][
            pipeline.algorithm.id()]['time'] = pipeline.t_algorithm
        results[doc]['algorithms'][pipeline.algorithm.id(
        )]['compatibilities'] = pipeline.algorithm.compatibilities.tolist()
        results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'] = dict()

        acc_str = ''
        for solver, t_solver in zip(pipeline.solvers, pipeline.t_solvers):
            results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][
                solver.id()] = dict()
            results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][
                solver.id()]['solution'] = solver.solution
            results[doc]['algorithms'][pipeline.algorithm.id()]['solvers'][
                solver.id()]['time'] = t_solver
            acc_str += ' {}={:.2f}%'.format(
                solver.id(), 100 * accuracy(solver.solution, init_permutation))
        print(acc_str)

os.makedirs('results', exist_ok=True)
json.dump(results, open('results/default.json', 'w'))
print('Elapsed time={:.2f} sec.'.format(time.time() - t0_glob))
Пример #2
0
    solver = SolverLS(maximize=True)
pipeline = Pipeline(algorithm, [solver])

# load strips and shuffle the strips
print('1) Load strips')
strips = Strips(path=args.doc, filter_blanks=True)
strips.shuffle()
init_permutation = strips.permutation()
print('Shuffled order: ' + str(init_permutation))

print('2) Results')
pipeline.run(strips)
# matrix -> list (displacements according the neighobors strips in solution)
compatibilities = pipeline.algorithm.compatibilities
displacements = pipeline.algorithm.displacements
solution = pipeline.solvers[0].solution
displacements = [
    displacements[prev][curr]
    for prev, curr in zip(solution[:-1], solution[1:])
]
corrected = [init_permutation[idx] for idx in solution]
print('Solution: ' + str(solution))
print('Correct order: ' + str(corrected))
print('Accuracy={:.2f}%'.format(100 * accuracy(solution, init_permutation)))
print('Qc={:.2f}%'.format(
    100 *
    Qc(compatibilities, init_permutation, pre_process=True, normalized=True)))
reconstruction = strips.image(order=solution, displacements=displacements)
plt.imshow(reconstruction, cmap='gray')
plt.axis('off')
plt.show()
Пример #3
0
processed = 1
total = len(docs) * len(algorithms)
records = []
for algorithm in algorithms:
    d = 2 if algorithm.id() == 'marques' else 0
    for doc, strips in strips_all.items():
        print('[{:.2f}%] algorithm={} doc={}'.format(100 * processed / total,
                                                     algorithm.id(), doc))
        processed += 1
        init_permutation = strips.permutation()
        compatibilities = algorithm(strips=strips, d=d).compatibilities
        qc = Qc(compatibilities,
                init_permutation,
                pre_process=False,
                normalized=True)
        for solver in solvers:
            solution = solver(instance=compatibilities).solution
            if solution is not None:  # some solutions for the tested algorithm are None due to the poor compat. eval.
                acc = accuracy(solution, init_permutation)
                print('     => {} - acc={:.2f}% qc={:.2f}%'.format(
                    solver.id(), 100 * acc, 100 * qc))
                records.append([
                    algorithm.id(),
                    solver.id(), doc, acc, qc, init_permutation, solution,
                    compatibilities.tolist()
                ])

os.makedirs('results', exist_ok=True)
json.dump(records, open('results/fair.json', 'w'))
print('Elapsed time={:.2f} sec.'.format(time.time() - t0_glob))
Пример #4
0
def reconstruct():

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # parameters processing
    parser = argparse.ArgumentParser(description='Single reconstruction :: Proposed.')
    parser.add_argument(
        '-d', '--dataset', action='store', dest='dataset', required=False, type=str,
        default='cdip', help='Dataset [D1, D2, or cdip].'
    )
    parser.add_argument(
        '-t', '--thresh', action='store', dest='thresh', required=False, type=str,
        default='sauvola', help='Thresholding method [otsu or sauvola].'
    )
    parser.add_argument(
        '-m', '--model-id', action='store', dest='model_id', required=False, type=str,
        default=None, help='Model identifier (tag).'
    )
    parser.add_argument(
        '-i', '--input-size', action='store', dest='input_size', required=False, nargs=2, type=int,
        default=[3000, 32], help='Network input size (H x W).'
    )
    parser.add_argument(
       '-v', '--vshift', action='store', dest='vshift', required=False, type=int,
       default=10, help='Vertical shift range.'
    )
    parser.add_argument(
        '-r', '--results-id', action='store', dest='results_id', required=False, type=str,
        default=None, help='Identifier of the results file.'
    )
    parser.add_argument(
        '-fd', '--feat-dim', action='store', dest='feat_dim', required=False, type=int,
        default=64, help='Features dimensionality.'
    )
    parser.add_argument(
        '-fl', '--feat-layer', action='store', dest='feat_layer', required=False, type=str,
        default='drop9', help='Features layer.'
    )
    parser.add_argument(
        '-a', '--activation', action='store', dest='activation', required=False, type=str,
        default='sigmoid', help='Activation function (final net layer).'
    )

    args = parser.parse_args()

    input_size = tuple(args.input_size)

    assert args.dataset in ['D1', 'D2', 'cdip']
    assert args.thresh in ['otsu', 'sauvola']
    assert args.results_id is not None
    assert args.vshift >= 0
    assert input_size in [(3000, 32), (3000, 48), (3000, 64)]

    # algorithm definition
    weights_path_left = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['best_model_left']
    weights_path_right = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['best_model_right']
    sample_height = json.load(open('traindata/{}/info.json'.format(args.model_id), 'r'))['sample_height']
    algorithm = Proposed(
        weights_path_left, weights_path_right, args.vshift,
        args.input_size, feat_dim=args.feat_dim, feat_layer=args.feat_layer,
        activation=args.activation, sample_height=sample_height,
        thresh_method=args.thresh, sess=sess
    )
    solver = SolverConcorde(maximize=False, max_precision=2)
    pipeline = Pipeline(algorithm, solver)

    # reconstruction instances
    if args.dataset == 'D1':
        docs = ['datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62) if i != 3]
    elif args.dataset == 'D2':
        docs = ['datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21)]
    else:
        docs = ['datasets/D3/mechanical/D{:03}'.format(i) for i in range(1, 101)] # cdip

    # results / initial configuration
    dir_name = 'results/proposed'
    os.makedirs(dir_name, exist_ok=True)
    results = {
        'model_id': args.model_id,
        'data': [] # experimental results data
    }
    results_fname = '{}/{}.json'.format(dir_name, args.results_id)

    total = len(docs)
    for it, doc in enumerate(docs, 1):
        # load strips
        t0 = time.time()
        strips = Strips(path=doc, filter_blanks=True)
        strips.shuffle()
        init_permutation = strips.permutation()
        load_time = time.time() - t0

        # run the pipeline
        solution, _,  displacements = pipeline.run(strips)

        # results
        acc = accuracy(solution, init_permutation)
        print('[{:.2f}%] doc={}, accuracy={:.2f}% inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s'.format(
            100 * it / total, doc.split('/')[-1], 100 * acc, algorithm.inference_time,
            pipeline.comp_time, pipeline.opt_time
        ))
        sys.stdout.flush()
        results['data'].append({
            'doc': doc,
            'solution': solution,
            'accuracy': acc,
            'init_perm': init_permutation,
            'load_time': load_time,
            'comp_time': pipeline.comp_time,
            'opt_time': pipeline.opt_time,
            'displacements': displacements.tolist(),
            'inf_time': algorithm.inference_time,
            'prep_time': algorithm.preparation_time,
            'pw_time': algorithm.pairwise_time
        })
    sess.close()

    # dump results
    json.dump(results, open(results_fname, 'w'))
Пример #5
0
def reconstruct():

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # parameters processing
    parser = argparse.ArgumentParser(
        description='Single reconstruction :: Sib18.')
    parser.add_argument('-d',
                        '--dataset',
                        action='store',
                        dest='dataset',
                        required=False,
                        type=str,
                        default='cdip',
                        help='Dataset [D1, D2, or cdip].')
    parser.add_argument('-r',
                        '--results-id',
                        action='store',
                        dest='results_id',
                        required=False,
                        type=str,
                        default=None,
                        help='Identifier of the results file.')
    parser.add_argument('-v',
                        '--vshift',
                        action='store',
                        dest='vshift',
                        required=False,
                        type=int,
                        default=10,
                        help='Vertical shift range.')

    args = parser.parse_args()

    assert args.dataset in ['D1', 'D2', 'cdip']
    assert args.results_id is not None

    # algorithm definition
    weights_path = json.load(open('traindata/sib18/info.json',
                                  'r'))['best_model']
    algorithm = Sib18('sn',
                      weights_path,
                      args.vshift, (3000, 32),
                      num_classes=2,
                      thresh_method='sauvola',
                      seed=SEED,
                      sess=sess)
    solver = SolverConcorde(maximize=True, max_precision=2)
    pipeline = Pipeline(algorithm, solver)

    # reconstruction instances
    docs1 = [
        'datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62)
        if i != 3
    ]
    docs2 = ['datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21)]
    docs3 = ['datasets/D3/mechanical/D{:03}'.format(i)
             for i in range(1, 101)]  # cdip
    if args.dataset == 'D1+D2':
        docs = docs1 + docs2
    elif args.dataset == 'D1':
        docs = docs1
    elif args.dataset == 'D2':
        docs = docs2
    else:
        docs = docs3

    # results / initial configuration
    dir_name = 'results/sib18'
    os.makedirs(dir_name, exist_ok=True)
    results = {
        'model_id': 'sib18',
        'data': []  # experimental results data
    }
    results_fname = '{}/{}.json'.format(dir_name, args.results_id)

    total = len(docs)
    for it, doc in enumerate(docs, 1):
        # load strips
        # load strips
        t0 = time.time()
        strips = Strips(path=doc, filter_blanks=True)
        strips.shuffle()
        init_permutation = strips.permutation()
        load_time = time.time() - t0
        # run the pipeline
        solution, _, displacements = pipeline.run(strips)
        # results
        acc = accuracy(solution, init_permutation)
        print(
            '[{:.2f}%] doc={}, accuracy={:.2f}% inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s'
            .format(100 * it / total,
                    doc.split('/')[-1], 100 * acc, algorithm.inference_time,
                    pipeline.comp_time, pipeline.opt_time))
        sys.stdout.flush()
        results['data'].append({
            'doc': doc,
            'solution': solution,
            'accuracy': acc,
            'init_perm': init_permutation,
            'load_time': load_time,
            'comp_time': pipeline.comp_time,
            'opt_time': pipeline.opt_time,
            'displacements': displacements.tolist(),
            'inf_time': algorithm.inference_time,
            'prep_time': algorithm.preparation_time,
            'pw_time': algorithm.pairwise_time
        })
    sess.close()

    # dump results
    json.dump(results, open(results_fname, 'w'))
Пример #6
0
def reconstruct():

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # parameters processing
    parser = argparse.ArgumentParser(
        description='Testing reconstruction of mixed documents.')
    parser.add_argument('-d',
                        '--dataset',
                        action='store',
                        dest='dataset',
                        required=False,
                        type=str,
                        default='cdip',
                        help='Dataset [D1, D2, or cdip].')
    parser.add_argument('-r',
                        '--results-id',
                        action='store',
                        dest='results_id',
                        required=False,
                        type=str,
                        default=None,
                        help='Identifier of the results file.')
    parser.add_argument('-v',
                        '--vshift',
                        action='store',
                        dest='vshift',
                        required=False,
                        type=int,
                        default=10,
                        help='Vertical shift range.')

    args = parser.parse_args()

    assert args.dataset in ['D1', 'D2', 'D1+D2', 'cdip']
    assert args.results_id is not None

    # algorithm definition
    weights_path = json.load(open('traindata/sib18/info.json',
                                  'r'))['best_model']
    algorithm = Sib18('sn',
                      weights_path,
                      args.vshift, (3000, 32),
                      num_classes=2,
                      thresh_method='sauvola',
                      seed=SEED,
                      sess=sess)
    solver = SolverConcorde(maximize=True, max_precision=2)
    pipeline = Pipeline(algorithm, solver)

    # reconstruction instances
    if args.dataset == 'D1':
        docs = [
            'datasets/D1/mechanical/D{:03}'.format(i) for i in range(1, 62)
            if i != 3
        ]
    elif args.dataset == 'D2':
        docs = [
            'datasets/D2/mechanical/D{:03}'.format(i) for i in range(1, 21)
        ]
    else:
        docs = [
            'datasets/D3/mechanical/D{:03}'.format(i) for i in range(1, 101)
        ]  # cdip

    # results
    dir_name = 'results/sib18_multi'
    os.makedirs(dir_name, exist_ok=True)
    results_fname = '{}/{}.json'.format(dir_name, args.results_id)
    compatibility_matrix_fname = '{}/{}.npy'.format(dir_name, args.results_id)
    results = {
        'model_id': 'sib18',
        'data': []  # experimental results data
    }

    # load strips
    print('loading strips')
    t0 = time.time()
    strips = Strips()  # empty
    for doc in docs:  # join documents strips
        strips += Strips(path=doc, filter_blanks=True)
    strips.shuffle()
    load_time = time.time() - t0

    # run the pipeline
    print('running the pipeline')
    solution, compatibilities, displacements = pipeline.run(strips,
                                                            verbose=True)

    # results
    acc = accuracy(solution, strips.permutation(), strips.sizes())

    print(
        'accuracy={:.2f}% load_time={:.2f}s inf_time={:.2f}s comp_time={:.2f}s opt_time={:.2f}s'
        .format(100 * acc, load_time, algorithm.inference_time,
                pipeline.comp_time, pipeline.opt_time))

    sys.stdout.flush()
    results['data'].append({
        'solution': solution,
        'accuracy': acc,
        'init_perm': strips.permutation(),
        'sizes': strips.sizes(),
        'load_time': load_time,
        'comp_time': pipeline.comp_time,
        'opt_time': pipeline.opt_time,
        'displacements': displacements.tolist(),
        'inf_time': algorithm.inference_time,
        'prep_time': algorithm.preparation_time,
        'pw_time': algorithm.pairwise_time
    })
    sess.close()

    # dump results and comp. matrix
    json.dump(results, open(results_fname, 'w'))
    np.save(compatibility_matrix_fname, compatibilities)