Пример #1
0
def main(results):
    # restore config and model
    dir_output = results

    config_data = Config(dir_output + "data.json")
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")

    vocab = Vocab(config_vocab)
    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    model.restore_session(dir_output + "model.weights/")

    # load dataset
    test_set = DataGenerator(index_file=config_data.index_test,
                             path_formulas=config_data.path_formulas_test,
                             dir_images=config_data.dir_images_test,
                             max_iter=config_data.max_iter,
                             path_matching=config_data.path_matching_test,
                             form_prepro=vocab.form_prepro)

    # use model to write predictions in files
    config_eval = Config({
        "dir_answers": dir_output + "formulas_test/",
        "batch_size": 20
    })
    files, perplexity = model.write_prediction(config_eval, test_set)
    formula_ref, formula_hyp = files[0], files[1]

    # score the ref and prediction files
    scores = score_files(formula_ref, formula_hyp)
    scores["perplexity"] = perplexity
    msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in scores.items()])
    model.logger.info("- Test Txt: {}".format(msg))
Пример #2
0
def main(data, vocab, training, model, output):
    # Load configs
    dir_output = output
    config = Config([data, vocab, training, model])
    config.save(dir_output)
    vocab = Vocab(config)

    # Load datasets
    train_set = DataGenerator(path_formulas=config.path_formulas_train,
            dir_images=config.dir_images_train, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_train,
            path_matching=config.path_matching_train,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)
    val_set = DataGenerator(path_formulas=config.path_formulas_val,
            dir_images=config.dir_images_val, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_val,
            path_matching=config.path_matching_val,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    # Define learning rate schedule
    n_batches_epoch = ((len(train_set) + config.batch_size - 1) //
                        config.batch_size)
    lr_schedule = LRSchedule(lr_init=config.lr_init,
            start_decay=config.start_decay*n_batches_epoch,
            end_decay=config.end_decay*n_batches_epoch,
            end_warm=config.end_warm*n_batches_epoch,
            lr_warm=config.lr_warm,
            lr_min=config.lr_min)

    # Build model and train
    model = Img2SeqModel(config, dir_output, vocab)
    model.build_train(config)
    model.train(config, train_set, val_set, lr_schedule)
Пример #3
0
def main(results):
    # restore config and model
    dir_output = results

    config_data  = Config(dir_output + "data.json")
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")

    vocab = Vocab(config_vocab)
    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    model.restore_session(dir_output + "model.weights/")

    # load dataset
    test_set = DataGenerator(path_formulas=config_data.path_formulas_test,
            dir_images=config_data.dir_images_test, img_prepro=greyscale,
            max_iter=config_data.max_iter, bucket=config_data.bucket_test,
            path_matching=config_data.path_matching_test,
            max_len=config_data.max_length_formula,
            form_prepro=vocab.form_prepro,)


    # build images from formulas
    formula_ref = path.join(dir_output, "formulas_test/ref.txt")
    formula_hyp = path.join(dir_output, "formulas_test/hyp_0.txt")
    images_ref  = path.join(dir_output, "images_test/ref/")
    images_test = path.join(dir_output, "images_test/hyp_0/")
    build_images(load_formulas(formula_ref), images_ref)
    build_images(load_formulas(formula_hyp), images_test)

    # score the repositories
    scores = score_dirs(images_ref, images_test, greyscale)
    msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in scores.items()])
    model.logger.info("- Eval Img: {}".format(msg))
Пример #4
0
def main(image, vocab, model, output):
    dir_output = output
    img_path = image

    # 加载配置,根据配置初始化字典和模型
    config = Config([vocab, model])
    vocab = Vocab(config)
    img2SeqModel = Img2SeqModel(config, dir_output, vocab)
    img2SeqModel.build_pred()

    vis_img_with_attention(img2SeqModel, img_path, dir_output)
Пример #5
0
def getModelForPrediction():
    # restore config and model
    dir_output = "./results/full/"
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")
    vocab = Vocab(config_vocab)

    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    # model.restore_session(dir_output + "model_weights/model.cpkt")
    return model
Пример #6
0
def main(data, vocab, training, model, output):
    # Load configs
    dir_output = output
    config = Config([data, vocab, training, model])
    config.save(dir_output)
    vocab = Vocab(config)

    # Load datasets
    train_set = DataGenerator(path_formulas=config.path_formulas_train,
            dir_images=config.dir_images_train, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_train,
            path_matching=config.path_matching_train,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    
    all_img = []
    all_formula = []
    for i, (_img, _formula) in enumerate(minibatches(train_set, batch_size)):
        all_img.append(_img)
        if _formula is not None:
            _formula, _formula_length = pad_batch_formulas(
            _formula,
            vocab.id_pad,
            vocab.id_end
        )
        all_formula.append(_formula)
    
    np.save('np_formula', np.array(all_formula))
    np.save('np_img', np.array(all_img))

    print("DONE EXPORTING NUMPY FILES")
    return None
    val_set = DataGenerator(path_formulas=config.path_formulas_val,
            dir_images=config.dir_images_val, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_val,
            path_matching=config.path_matching_val,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    # Define learning rate schedule
    n_batches_epoch = ((len(train_set) + config.batch_size - 1) //
                        config.batch_size)
    lr_schedule = LRSchedule(lr_init=config.lr_init,
            start_decay=config.start_decay*n_batches_epoch,
            end_decay=config.end_decay*n_batches_epoch,
            end_warm=config.end_warm*n_batches_epoch,
            lr_warm=config.lr_warm,
            lr_min=config.lr_min)

    # Build model and train
    model = Img2SeqModel(config, dir_output, vocab)
    model.build_train(config)
    model.train(config, train_set, val_set, lr_schedule)
Пример #7
0
def load_model(dir_output="results/full/",
               vocab_config='vocab.json',
               model_config='model.json',
               model_path='model.weights/'):
    config_vocab = Config(os.path.join(dir_output, vocab_config))
    config_model = Config(os.path.join(dir_output, model_config))

    vocab = Vocab(config_vocab)
    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    model.restore_session(os.path.join(dir_output, model_path))

    return model
Пример #8
0
def get_im2latex_model(weight_dir):
    """
    Load up model from the given weight location
    :param weight_dir: weight location
    :return: trained model
    """
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    tf.logging.set_verbosity(tf.logging.ERROR)
    config_vocab = Config(weight_dir + "vocab.json")
    config_model = Config(weight_dir + "model.json")
    vocab = Vocab(config_vocab)
    model = Img2SeqModel(config_model, weight_dir, vocab)
    model.build_pred()
    model.restore_session(weight_dir + "model.weights/")

    return model
Пример #9
0
def img2latex_api(weight_dir, img_path, downsample_image_ratio, cropping,
                  padding, gray_scale):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf.logging.set_verbosity(tf.logging.ERROR)
    config_vocab = Config(weight_dir + "vocab.json")
    config_model = Config(weight_dir + "model.json")
    vocab = Vocab(config_vocab)

    model = Img2SeqModel(config_model, weight_dir, vocab)
    model.build_pred()
    model.restore_session(weight_dir + "model.weights/")

    seq = iaa.Sequential([iaa.GammaContrast(2)])
    latex, _, _ = img2latex(model,
                            img_path,
                            downsample_image_ratio=downsample_image_ratio,
                            cropping=cropping,
                            padding=padding,
                            img_augment=seq,
                            gray_scale=gray_scale)
    processed_latex = postprocess(latex)
    return processed_latex
Пример #10
0
def predict(trained_model):
    """

    Args:
        trained_model (Img2SeqModel):
    """
    if len(sys.argv) <= 1:
        print "Usage"
        print "python predict.py abs/path/to/image.png"
        return
    image_path = sys.argv[1]
    img = imread(image_path)
    img = greyscale(img)
    res = trained_model.predict(img)
    trained_model.logger.info(res[0])


if __name__ == "__main__":
    # restore config and model
    dir_output = "results/full/"
    config_vocab = Config(dir_output + "vocab.json")
    config_model = Config(dir_output + "model.json")
    vocab = Vocab(config_vocab)

    model = Img2SeqModel(config_model, dir_output, vocab)
    model.build_pred()
    model.restore_session(dir_output + "model.weights/")

    # interactive_shell(model)
    predict(model)
Пример #11
0
import os

from model.img2seq import Img2SeqModel
from model.utils.general import Config, run
from model.utils.text import Vocab

APP_ROOT = os.path.dirname(
    os.path.abspath(__file__))  # refers to application_top
MODEL_FOLDER = os.path.join(APP_ROOT, 'full')

print("loading model ...")

config_vocab = Config(os.path.join(MODEL_FOLDER, "vocab.json"))
config_model = Config(os.path.join(MODEL_FOLDER, "model.json"))
vocab = Vocab(config_vocab)

model = Img2SeqModel(config_model, "/tmp/mxlatexfull", vocab)
model.build_pred()
model.restore_session(os.path.join(MODEL_FOLDER, "model.weights/"))