Exemplo n.º 1
0
 def _pre_process(self):
     self.options = train.config()
     with open('./out/options.pkl', 'rb') as f:
         opt = pickle.load(f)
         self.options.__dict__.update(opt)
         self.options.batch_size = 1
     vocab_file = './data/vocab.txt'
     self.data_tools = data_process.Data(vocab_file, None, self.options,
                                         logging)
     self.tokenizer = utils.Tokenizer(logging)
Exemplo n.º 2
0
from pysts.kerasts.objectives import ranknet, ranksvm, cicerons_1504
import pysts.kerasts.blocks as B


if __name__ == "__main__":
    modelname, taskname, trainf, valf = sys.argv[1:5]
    params = sys.argv[5:]

    model_module = importlib.import_module('.'+modelname, 'models')
    task_module = importlib.import_module('.'+taskname, 'tasks')
    task = task_module.task()
    # Preliminary config:
    # (N.B. some conf values will be the sets, which is not something
    # we can use directly, but we just assume whatever we use below
    # directly wasn't specified as a tunable.)
    conf, ps, h = config(model_module.config, task.config, params)
    task.set_conf(conf)

    # TODO configurable embedding class
    if conf['embdim'] is not None:
        print('GloVe')
        task.emb = emb.GloVe(N=conf['embdim'])

    print('Dataset')
    if 'vocabf' in conf:
        task.load_vocab(conf['vocabf'])
    task.load_data(trainf, valf)

    tuneargs = dict()
    for p in params:
        k, v = p.split('=')
Exemplo n.º 3
0
        } for s1, sc in zip(s1texts, res)],
               key=lambda x: x['score'],
               reverse=True)[:int(request.json['k'])]
    }), 200


if __name__ == "__main__":
    modelname, taskname, vocabf, weightsf, s1f = sys.argv[1:6]
    params = sys.argv[6:]

    load_s1texts(s1f)

    model_module = importlib.import_module('.' + modelname, 'models')
    task_module = importlib.import_module('.' + taskname, 'tasks')
    task = task_module.task()
    conf, ps, h = config(model_module.config, task.config, params)
    task.set_conf(conf)
    print(ps)

    # TODO we should be able to get away with actually *not* loading
    # this at all!
    if conf['embdim'] is not None:
        print('GloVe')
        task.emb = emb.GloVe(N=conf['embdim'])
    else:
        task.emb = None

    print('Dataset')
    task.load_vocab(vocabf)

    print('Model')
Exemplo n.º 4
0
from train import config, get_batch

tf_config = tf.ConfigProto(allow_soft_placement=True)
tf_config.gpu_options.allow_growth = True

model_path = "checkpoint/model.ckpt"

if __name__ == "__main__":
    print("(1)load data......")
    docs_source = ['new jersey is usually hot during autumn , and it is never quiet in winter .\n']
    docs_target = ["new jersey est généralement chaud pendant l' automne , et il est jamais calme en hiver .\n"]
    w2i_source, i2w_source = helper.load_vocab('./data/small_vocab_en.txt', helper.SOURCE_CODES)
    w2i_target, i2w_target = helper.load_vocab('./data/small_vocab_fr.txt', helper.TARGET_CODES)

    print("(2) build model......")
    config = config()
    config.source_vocab_size = len(w2i_source)
    config.target_vocab_size = len(w2i_target)
    model = Seq2seq(config, w2i_target, useTeacherForcing=False)

    print("(3) run model......")
    print_every = 100
    max_target_len = 20

    with tf.Session(config=tf_config) as sess:
        saver = tf.train.Saver()
        saver.restore(sess, model_path)

        source_batch, source_lens, target_batch, target_lens = get_batch(docs_source, w2i_source, docs_target,
                                                                         w2i_target, config.batch_size)
Exemplo n.º 5
0
# Basic imports
import os,sys,time
import shutil,csv
from keras.layers import Input
from train import config
from net.parallel_model import ParallelModel

# Load configuration and check if it's good
cfg = config()
if not cfg.parse(sys.argv) or not cfg.sanity_check():
  sys.exit(1)

# Get the start iter number
start_iter=0
if cfg.LOAD_FILE:
  start_iter=int(cfg.LOAD_FILE.split('-')[1])

# Print configuration
print '\033[95mConfiguration\033[00m'
print cfg
time.sleep(0.5)

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="4,5,6,7,8"
GPU_COUNT=5
# Import more libraries (after configuration is validated)
import numpy as np
import tensorflow as tf
import numpy as np
from dataloader import larcv_data
Exemplo n.º 6
0

if __name__ == "__main__":
    modelname, task1name, vocab1f, weightsf, task2name, train2f, val2f = sys.argv[1:8]
    params = sys.argv[8:]

    model_module = importlib.import_module('.'+modelname, 'models')

    task1_module = importlib.import_module('.'+task1name, 'tasks')
    task1 = task1_module.task()
    task2_module = importlib.import_module('.'+task2name, 'tasks')
    task2 = task2_module.task()

    # setup conf with task2, because that's where we'll be doing
    # our training
    conf, ps, h = config(model_module.config, task2.config, params)
    task1c = dict(conf)
    if 'task1_conf' in conf:
        for k, v in conf.pop('task1_conf').items():
            task1c[k] = v
    task1.set_conf(task1c)
    task2.set_conf(conf)

    # TODO configurable embedding class
    if conf['embdim'] is not None:
        print('GloVe')
        task2.emb = emb.GloVe(N=conf['embdim'])
        task1.emb = task2.emb

    print('Dataset 1')
    task1.load_vocab(vocab1f)
Exemplo n.º 7
0
 def __init__(self):
     self.options = train.config()
     self.build()