예제 #1
0
def train_encoder(name_of_data, sentences, max_epochs=5, save_frequency=1000):
    if not os.path.exists('data/'):
        os.makedirs('data')
    sys.path.insert(0, 'training/')
    import vocab
    worddict, wordcount = vocab.build_dictionary(sentences)
    vocab.save_dictionary(worddict, wordcount,
                          'data/' + name_of_data + '_dictionary.pkl')
    pickle.dump(sentences, open('data/' + name_of_data + '_sen.p', 'w'))
    with open('training/train.py', 'r') as f:
        text = f.read()
        text = text.replace('max_epochs=5', 'max_epochs=' + str(max_epochs))
        text = text.replace('saveto=\'/u/rkiros/research/semhash/models/toy.npz\'',\
         'saveto=\'data/' + name_of_data + '_encoder.npz\'')
        text = text.replace('dictionary=\'/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl\'',\
         'dictionary=\'data/' + name_of_data + '_dictionary.pkl\'')
        text = text.replace('n_words=20000',
                            'n_words=' + str(len(wordcount.keys())))
        text = text.replace('saveFreq=1000', 'saveFreq=' + str(save_frequency))
        g = open('training/train_temp.py', 'w')
        g.write(text)
        g.close()

    import train_temp
    train_temp.trainer(sentences)
예제 #2
0
def main(data_path, dict_path, save_path, batch_size, reload_, reload_path):
    os.environ["THEANO_FLAGS"] = "floatX=float32"

    file_names = get_file_list(data_path, ['txt'])
    train_sent = load_txt_sent(file_names)

    if not os.path.exists(dict_path):
        print "Dictionary not found, recreating"
        worddict, wordcount = vocab.build_dictionary(train_sent)
        print "Built. Saving to: {}".format(dict_path)
        vocab.save_dictionary(worddict, wordcount, dict_path)
    else:
        print "Found dictionary at {}... Loading...".format(dict_path)
        worddict = vocab.load_dictionary(dict_path)
   
    print "Beginning Training..." 
    train.trainer(train_sent, batch_size=batch_size,  reload_=reload_, dictionary=dict_path, saveto=save_path, reload_path=reload_path, saveFreq=10000)  
예제 #3
0
def create_dictionaries(data, load_test, langs, dictionaries):
    train, dev, test = [], [], []
    for d,t in zip(data, load_test):
        train_, dev_, test_ = [], [], []
        for lang in langs:
            train_.append("%s/train.%s"%(d,lang))
            dev_.append("%s/dev.%s"%(d,lang))
            test_.append("%s/test.%s"%(d,lang))
        train.append(train_)
        dev.append(dev_)
        if t:
            test.append(test_)

    # debugging: make sure data set files exist
    assert( all([os.path.isfile(t) for t_ in train for t in t_]) ), "Could not find train files.\n%s"%train[0][0]
    assert( all([os.path.isfile(d) for d_ in dev for d in d_]) ), "Could not find dev files.\n%s"%dev[0][0]
    assert( all([os.path.isfile(t) for t_ in test for t in t_]) ), "Could not find test files.\n%s"%test[0][0]
    
    # Load training and development sets, alternatively also test set
    print 'Loading dataset'
    wordslang = []
    for d,t in zip(data, load_test):
        train, dev, test = load_multilingual_dataset(path_to_data=d,
                                                     langs=langs, load_test=t, load_images=False)
        for lidx,lang in enumerate(langs):
            #print len(train[0][lidx]), len(dev[0][lidx]), len(test[0][lidx])
            wordslang.append( train[0][lidx]+dev[0][lidx]+test[0][lidx] )

    worddicts = []
    iworddicts = []
    # Create and save dictionaries
    print 'Creating and saving multilingual dictionaries %s ...'%(", ".join(langs))
    for lidx, (lang, saveto) in enumerate(zip(langs,dictionaries)):
        worddict = build_dictionary(wordslang[lidx])[0]
        n_words_dict = len(worddict)
        print '%s dictionary size: %s'%(lang,str(n_words_dict))
        with open('%s.dictionary-%s.pkl'%(saveto,lang), 'wb') as f:
            pkl.dump(worddict, f)

    print 'Done.'
예제 #4
0
def train_decoder(name_of_data,
                  sentences,
                  model,
                  p,
                  max_epochs=5,
                  save_frequency=1000,
                  n_words=20000,
                  maxlen_w=30,
                  reload_=False):
    if not os.path.exists('data/'):
        os.makedirs('data')
    sys.path.insert(1, 'decoding/')
    import vocab
    reload(vocab)
    worddict, wordcount = vocab.build_dictionary(sentences, n_words)
    vocab.save_dictionary(worddict, wordcount,
                          'data/' + name_of_data + '_dictionary.pkl')
    with open('decoding/train.py', 'r') as f:
        text = f.read()
        text = text.replace('max_epochs=5', 'max_epochs=' + str(max_epochs))
        text = text.replace('saveto=\'/u/rkiros/research/semhash/models/toy.npz\'',\
         'saveto=\'data/' + name_of_data + '_decoder.npz\'')
        text = text.replace('dictionary=\'/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl\'',\
         'dictionary=\'data/' + name_of_data + '_dictionary.pkl\'')
        text = text.replace('n_words=40000',
                            'n_words=' + str(len(wordcount.keys())))
        text = text.replace('saveFreq=1000', 'saveFreq=' + str(save_frequency))
        g = open('decoding/train_temp.py', 'w')
        g.write(text)
        g.close()

    import train_temp
    reload(train_temp)
    return train_temp.trainer(sentences,
                              sentences,
                              model,
                              p,
                              maxlen_w=maxlen_w,
                              reload_=reload_)
예제 #5
0
def main():
    parser = argparse.ArgumentParser(
        description='Pass target style genre to train decoder')
    parser.add_argument('-s',
                        '--style_genre',
                        help='the name of style corpus',
                        required='True',
                        default='localhost')
    flag = parser.parse_args()

    style_corpus_path = "/media/VSlab3/kuanchen_arxiv/artistic_style_corpora/{}".format(
        flag.style_genre)
    style_genre = flag.style_genre.split(".")[0]

    X = []
    with open(style_corpus_path, 'r') as handle:
        for line in handle.readlines():
            X.append(line.strip())
    C = X
    if not os.path.isfile("./vocab_save/{}.pkl".format(style_genre)):
        print "Get vocabulary..."
        worddict, wordcount = vocab.build_dictionary(X)
        vocab.save_dictionary(worddict=worddict,
                              wordcount=wordcount,
                              loc="vocab_save/{}.pkl".format(style_genre))
    else:
        pass
    savepath = "./logs_{}".format(style_genre)
    if not os.path.exists(savepath):
        os.mkdir(savepath)
    skmodel = skipthoughts.load_model()
    train.trainer(X,
                  C,
                  skmodel,
                  dictionary="vocab_save/{}.pkl".format(style_genre),
                  savepath=savepath,
                  saveto="model.npz")
예제 #6
0
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print "reloading..." + load_from
        with open("%s.pkl" % load_from, "rb") as f:
            curr_model = pkl.load(f)
    else:
        curr_model["options"] = {}

    for k, v in kwargs.iteritems():
        curr_model["options"][k] = v

    model_options = curr_model["options"]

    # initialize logger
    import datetime

    timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name

    from logger import Log

    log = Log(
        name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1
    )

    print curr_model["options"]

    # Load training and development sets
    print "Loading dataset"
    dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True)
    train = dataset["train"]
    dev = dataset["dev"]

    # Create dictionary
    print "Creating dictionary"
    worddict = build_dictionary(train["caps"] + dev["caps"])
    print "Dictionary size: " + str(len(worddict))
    curr_model["worddict"] = worddict
    curr_model["options"]["n_words"] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb"))

    print "Loading data"
    train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print "Building model"
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print "Building sentence encoder"
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print "Building image encoder"
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print "Building f_grad...",
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print "Building errors.."
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model["f_senc"] = f_senc
    curr_model["f_ienc"] = f_ienc
    curr_model["f_err"] = f_err

    if model_options["grad_clip"] > 0.0:
        grads = [maxnorm(g, model_options["grad_clip"]) for g in grads]

    lr = tensor.scalar(name="lr")
    print "Building optimizers...",
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost)

    print "Optimization"

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options["max_epochs"]):

        print "Epoch ", eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options["lrate"])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print "NaN detected"
                return 1.0, 1.0, 1.0

            if numpy.mod(uidx, model_options["dispFreq"]) == 0:
                print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud
                log.update({"Error": float(cost)}, n_samples)

            if numpy.mod(uidx, model_options["validFreq"]) == 0:

                print "Computing results..."

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples)
                print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        "Image2Caption_R@1": r1i,
                        "Image2Caption_R@5": r5i,
                        "Image2CaptionR@10": r10i,
                        "Image2Caption_median_rank": medri,
                        "Image2Caption_mean_rank": meanri,
                    },
                    n_samples,
                )

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print "Saving...",
                    numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams))
                    print "Done"
                    vis_details["hyperparams"] = model_options
                    # Save visualization details
                    with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open("vis/roc/index.json", "r"))
                    except IOError:
                        index = {model_options["data"]: []}

                    models = index[model_options["data"]]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open("vis/roc/index.json", "w") as f:
                        json.dump(index, f)

        print "Seen %d samples" % n_samples
예제 #7
0
def main():
    model_config = configuration.ModelConfig()
    model_config.data = FLAGS.input_dataset_name

    #loading dataset
    print('Loading dataset ...')
    (train_caps,
     train_ims), (test_caps,
                  test_ims), _ = load_dataset(name=model_config.data,
                                              load_train=True)
    train_nic_ims = train_ims[:, 1536:]
    test_nic_ims = test_ims[:, 1536:]
    train_ims[:, 1536:] = preprocessing.scale(train_nic_ims)
    test_ims[:, 1536:] = preprocessing.scale(test_nic_ims)

    test_vgg_feature = test_ims[:, :1536]
    test_NIC_feature = test_ims[:, 1536:]

    #create and save dictionary
    print('creating dictionary')
    worddict = build_dictionary(train_caps + test_caps)[0]
    n_words = len(worddict)
    model_config.n_words = n_words
    model_config.worddict = worddict
    print('dictionary size: ' + str(n_words))
    with open('f8k.dictionary.pkl', 'wb') as f:
        pkl.dump(worddict, f)

    #Building the model
    print('Building the model ...')
    model = LTS(model_config)
    model.build()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    saver = tf.train.Saver(max_to_keep=model_config.max_checkpoints_to_keep)

    #sess = tf.Session(config=config)

    print('start embedding training')
    curr = 0.
    uidx = 0.
    train_iter = homogeneous_data.HomogeneousData(
        data=[train_caps, train_ims],
        batch_size=model_config.batch_size,
        maxlen=model_config.maxlen_w)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(model_config.max_epochs):
            # Train G
            print('Epoch ', epoch)

            if epoch == 15:
                model_config.lrate = model_config.lrate / 10

            for x, im in train_iter:
                uidx += 1

                ls, mask, im = homogeneous_data.prepare_data(
                    caps=x,
                    features=im,
                    worddict=worddict,
                    maxlen=model_config.maxlen_w,
                    n_words=model_config.n_words)

                vgg_feature = im[:, :1536]
                NIC_feature = im[:, 1536:]

                #embedding training
                _, cost = sess.run(
                    [model.updates, model.embedding_loss],
                    feed_dict={
                        model.VGG_pred_data: vgg_feature,
                        model.NIC_pred_data: NIC_feature,
                        model.ls_pred_data: ls.T,
                        model.input_mask: mask.T,
                        model.keep_prob: 0.5,
                        model.phase: 1,
                        model.learning_rate: model_config.lrate
                    })

                if np.mod(uidx, 10) == 0:
                    print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost)

                if np.mod(uidx, 100) == 0:
                    print('test ...')

                    # encode images into the text embedding space
                    images = getTestImageFeature(sess, model, test_vgg_feature,
                                                 test_NIC_feature)
                    features = getTestTextFeature(sess, model, model_config,
                                                  test_caps)

                    (r1, r5, r10, medr) = recall.i2t(images, features)
                    print("Image to text: %.1f, %.1f, %.1f, %.1f" %
                          (r1, r5, r10, medr))
                    (r1i, r5i, r10i, medri) = recall.t2i(images, features)
                    print("Text to image: %.1f, %.1f, %.1f, %.1f" %
                          (r1i, r5i, r10i, medri))

                    currscore = r1 + r5 + r10 + r1i + r5i + r10i
                    if currscore > curr:
                        curr = currscore

                        # Save model
                        print('Saving...')
                        saver.save(sess,
                                   "checkpoint_files/model.ckpt",
                                   global_step=int(uidx + 1))
                        print('done.')

    sess = tf.Session()
    model_path = tf.train.latest_checkpoint("checkpoint_files/")
    if not model_path:
        print("Skipping testing. No checkpoint found in: %s",
              FLAGS.checkpoint_dir)
        return

    print("Loading model from checkpoint: %s", model_path)
    saver.restore(sess, model_path)
    print("Successfully loaded checkpoint: %s", model_path)

    images = getTestImageFeature(sess, model, test_vgg_feature,
                                 test_NIC_feature)

    # encode sentences into the text embedding space
    features = getTestTextFeature(sess, model, model_config, test_caps)

    (r1, r5, r10, medr) = recall.i2t(images, features)
    print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr))
    (r1i, r5i, r10i, medri) = recall.t2i(images, features)
    print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri))
예제 #8
0
                        default='./target_text/toy_story.txt')
    args = parser.parse_args()

    target_name = args.targe_text.split("/")[-1].split(".")[
        0]  # Get target text file name. eg. "speeches.txt"

    download_model()
    print("Loading Skip-Vector Model...")
    skmodel = skipthoughts.load_model()
    print("Done!")
    """
    Step 1: Generating dictionary for the target text.
    """
    print("Generating dictionary for the target text...")
    X = load_text(args.targe_text)
    worddict, wordcount = vocab.build_dictionary(X)
    #vocab.save_dictionary(worddict, wordcount, './target_dict/%s_dict.pkl'%target_name)
    #print("Done! Saved dictionary under ./target_dict/ as %s_dict.pkl"%target_name)
    vocab.save_dictionary(worddict, wordcount,
                          './%s/%s_dict.pkl' % (target_name, target_name))
    print("Done! Saved dictionary under ./%s/ as %s_dict.pkl" %
          (target_name, target_name))
    """
    Step 2: Generating style vector for the target text.
    """
    print("Generating style vector for the target text...")
    nltk.download(
        'punkt')  # Natural Language Toolkit for skipthoughts encoder.
    print("The lenth of X is:")
    print len(X)
    skip_vector = skipthoughts.encode(skmodel, X)
예제 #9
0
    """
    all_sent = []
    for txt_file in flist_txt:
        print "Reading file: {}".format(txt_file)
        with open(txt_file, 'r') as f:
            data = f.read()
        sent = data.split('\n')
        all_sent += sent
    print "File loading complete. Cleaning..."
    #all_sent = map(clean_string, all_sent)
    return all_sent


if __name__ == "__main__":
    os.environ["THEANO_FLAGS"] = "floatX=float32"

    file_names = get_file_list(data_path, ['txt'])
    train_sent = load_txt_sent(file_names)

    if not os.path.exists(dict_path):
        print "Dictionary not found, recreating"
        worddict, wordcount = vocab.build_dictionary(train_sent)
        print "Built. Saving to: {}".format(dict_path)
        vocab.save_dictionary(worddict, wordcount, dict_path)
    else:
        print "Found dictionary at {}... Loading...".format(dict_path)
        worddict = vocab.load_dictionary(dict_path)
   
    print "Beginning Training..." 
    train.trainer(train_sent, n_words=20000, dim=2400, batch_size=128,  reload_=False, dictionary=dict_path, saveto=save_path)  
예제 #10
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',
            max_epochs=15,
            dispFreq=10,
            decay_c=0.0,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            lrate=0.0002,
            concat=True,
            reload_=False):

    hyper_params = {
        'data': data,
        'encoder': encoder,
        'batch_size': batch_size,
        'time': cur_time,
        'lrate': lrate,
        'concat': concat,
    }

    i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items())
    i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items())
    i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items())
    t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items())
    t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items())
    t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items())

    i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items())
    t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items())

    agent = Agent(port=5020)
    i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True)
    i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True)
    i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True)
    t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True)
    t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True)
    t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True)

    i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True)
    t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True)

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_
    model_options['concat'] = concat

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x_id, im = homogeneous_data.prepare_data(x,
                                                     im,
                                                     worddict,
                                                     maxlen=maxlen_w,
                                                     n_words=n_words)

            if x_id is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x_id = Variable(torch.from_numpy(x_id).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            ud_start = time.time()
            x, im = img_sen_model(x_id, im, x)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()
            ud = time.time() - ud_start

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu(
                ).numpy()[0], 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r1, r5, r10, medr = 0.0, 0.0, 0.0, 0
                r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0
                r_time = time.time()
                if data == 'arch' or data == 'arch_small':
                    (r1, r5, r10, medr) = i2t_arch(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i_arch(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)
                else:
                    (r1, r5, r10, medr) = i2t(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                record_num = uidx / validFreq
                agent.append(i2t_r1_agent, record_num, r1)
                agent.append(i2t_r5_agent, record_num, r5)
                agent.append(i2t_r10_agent, record_num, r10)
                agent.append(t2i_r1_agent, record_num, r1i)
                agent.append(t2i_r5_agent, record_num, r5i)
                agent.append(t2i_r10_agent, record_num, r10i)

                agent.append(i2t_med_agent, record_num, medr)
                agent.append(t2i_med_agent, record_num, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

        print 'Seen %d samples' % n_samples
예제 #11
0
def trainer(
        data='f30k-comparable-full',
        path_to_data='./data/',
        langs=['en', 'de'],
        margin=0.2,
        dim=100,
        dim_multimodal=100,
        dim_image=4096,
        dim_word=100,
        encoders={
            'en': 'gru',
            'de': 'gru'
        },  # gru OR bow
        max_epochs=15,
        dispFreq=10,
        decay_c=0.,
        grad_clip=2.,
        maxlen_w=100,
        optimizer='adam',
        batch_size=128,
        saveto='./models/f30k-comparable-full.npz',
        validFreq=100,
        testFreq=100,
        lrate=0.0002,
        reload_=False,
        # new parameters
        max_words={
            'en': 0,
            'de': 0
        },  # integer, zero means unlimited
        debug=False,
        use_dropout=False,
        dropout_embedding=0.2,  # dropout for input embeddings (0: no dropout)
        dropout_hidden=0.2,  # dropout for hidden layers (0: no dropout)
        dropout_source=0.0,  # dropout source words (0: no dropout)
        #dropout_prob=0.5,
    load_test=False,
        lambda_img_sent=0.5,
        lambda_sent_sent=0.5,
        bidirectional_enc=False,
        n_enc_hidden_layers=1,
        use_all_costs=False,
        create_dictionaries=False,
        attention_type='dot',  # one of 'general', 'dot'
        decay_c_general_attention=0.0,  # L2 regularisation for the attention matrices
        dictionaries_min_freq=0):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['langs'] = langs
    for lang in langs:
        model_options['encoder_%s' % lang] = encoders[lang]
        model_options['max_words_%s' % lang] = max_words[lang]
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_multimodal'] = dim_multimodal
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['testFreq'] = testFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_
    model_options['use_dropout'] = use_dropout
    model_options['dropout_embedding'] = dropout_embedding
    model_options['dropout_hidden'] = dropout_hidden
    model_options['dropout_source'] = dropout_source
    #model_options['dropout_prob'] = dropout_prob
    model_options['bidirectional_enc'] = bidirectional_enc
    model_options['n_enc_hidden_layers'] = n_enc_hidden_layers
    model_options['load_test'] = load_test
    model_options['lambda_img_sent'] = lambda_img_sent
    model_options['lambda_sent_sent'] = lambda_sent_sent
    model_options['use_all_costs'] = use_all_costs
    model_options['use_all_costs'] = use_all_costs
    model_options['create_dictionaries'] = create_dictionaries
    model_options['dictionaries_min_freq'] = dictionaries_min_freq
    model_options['attention_type'] = attention_type
    model_options['decay_c_general_attention'] = decay_c_general_attention

    assert (n_enc_hidden_layers >= 1)

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets, alternatively also test set
    print 'Loading dataset'
    train, dev, test = load_multilingual_dataset(data,
                                                 langs,
                                                 load_test=load_test)

    worddicts = []
    iworddicts = []
    if create_dictionaries:
        # Create and save dictionaries
        print 'Creating and saving multilingual dictionaries %s' % (", ".join(
            model_options['langs']))
        for lang_idx, lang in enumerate(langs):
            if load_test:
                worddict = build_dictionary(
                    train[0][lang_idx] + dev[0][lang_idx] + test[0][lang_idx],
                    dictionaries_min_freq)[0]
            else:
                worddict = build_dictionary(
                    train[0][lang_idx] + dev[0][lang_idx],
                    dictionaries_min_freq)[0]
            n_words_dict = len(worddict)
            print 'minimum word frequency: %i' % dictionaries_min_freq
            print '%s dictionary size: %s' % (lang, str(n_words_dict))
            with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f:
                pkl.dump(worddict, f)

            # Inverse dictionaries
            iworddict = dict()
            for kk, vv in worddict.iteritems():
                iworddict[vv] = kk
            iworddict[0] = '<eos>'
            iworddict[1] = 'UNK'

            worddicts.append(worddict)
            iworddicts.append(iworddict)

            model_options["n_words_%s" % lang] = n_words_dict if max_words[
                lang] == 0 else max_words[lang]
    else:
        # load dictionaries
        print 'Loading multilingual dictionaries %s' % (", ".join(
            model_options['langs']))
        for lang_idx, lang in enumerate(langs):
            with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f:
                worddict = pkl.load(f)

            # Inverse dictionaries
            iworddict = dict()
            for kk, vv in worddict.iteritems():
                iworddict[vv] = kk
            iworddict[0] = '<eos>'
            iworddict[1] = 'UNK'

            worddicts.append(worddict)
            iworddicts.append(iworddict)

            model_options["n_words_%s" % lang] = n_words_dict if max_words[
                lang] == 0 else max_words[lang]

    # assert all max_words per language are equal
    assert (all(x == max_words.values()[0] for x in max_words.values()))

    print model_options

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            # all parameters but general attention, if any
            if not kk.endswith('mapping'):
                weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # weight decay for the general attention, if applicable
    if decay_c_general_attention > 0. and attention_type == 'general':
        decay_g = theano.shared(numpy.float32(decay_c_general_attention),
                                name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            if kk.endswith('mapping'):
                print 'Adding L2 for %s ...' % kk
                weight_decay += (vv**2).sum()
        weight_decay *= decay_g
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building multilingual sentence encoders'
    trng, alls_se = build_sentence_encoders(tparams, model_options)
    f_sencs = []
    for inps_se in alls_se:
        #print "sentence encoder input", inps_se
        inp_se, sentences = inps_se
        f_senc = theano.function(inp_se, sentences, profile=False)
        f_sencs.append(f_senc)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # create training set iterator where
    # a heuristic tries to make sure sentences in minibatch have a similar size
    train_iter = homogeneous_data_multilingual.HomogeneousDataMultilingual(
        train, batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0

    curr_best_model = None
    best_model_changed = True
    curr_best_score = 0.
    curr_best_rank = 1e10
    curr_ranks_langs = [1e10] * len(model_options['langs'])
    curr_scores_langs = [0.] * len(model_options['langs'])
    n_samples = 0

    ep_start = time.time()
    ep_times = [ep_start]
    for eidx in xrange(max_epochs):
        print 'Epoch ', eidx

        for xs, im in train_iter:
            uidx += 1
            xs, masks, im = homogeneous_data_multilingual.prepare_data(xs, im, \
                                                                       worddicts, \
                                                                       model_options=model_options, \
                                                                       maxlen=maxlen_w)

            if xs[0] is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # do not train on certain small sentences (less than 3 words)
            #if not x_src.shape[0]>=minlen_w and x_tgt.shape[0]>= minlen_w:
            #if not all( x.shape[0]>=minlen_w for x in xs ):
            #    print "At least one minibatch (in one of the languages in the model)",
            #    print "has less words than %i. Skipping..."%minlen_w
            #    skipped_samples += xs[0].shape[1]
            #    uidx -= 1
            #    continue

            n_samples += len(xs[0])

            # Update
            ud_start = time.time()
            # flatten inputs for theano function
            inps_ = []
            inps_.extend(xs)
            inps_.extend(masks)
            inps_.append(im)
            #cost = f_grad_shared(xs, masks, im)
            cost = f_grad_shared(*inps_)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                # store model's language dependent parameters
                for lang, worddict, iworddict, f_senc in zip(
                        langs, worddicts, iworddicts, f_sencs):
                    curr_model['worddict_%s' % lang] = worddict
                    curr_model['wordidict_%s' % lang] = iworddict
                    curr_model['f_senc_%s' % lang] = f_senc
                curr_model['f_ienc'] = f_ienc

                # up-to-date model parameters
                params_ = unzip(tparams)

                # encode sentences
                lss = []
                for lang_idx, lang in enumerate(model_options['langs']):
                    ls = encode_multilingual_sentences(curr_model,
                                                       dev[0][lang_idx],
                                                       lang=lang)
                    lss.append(ls)

                # encode images
                n_sentences_per_image = int(dev[2]) if len(dev) == 3 else 1
                dev_img_feats = numpy.repeat(dev[1],
                                             n_sentences_per_image,
                                             axis=0).astype('float32')
                lim = encode_images(curr_model, dev_img_feats)

                # print scores for each language pair
                for lang_idx1, lang1 in enumerate(model_options['langs']):
                    for lang_idx2, lang2 in enumerate(model_options['langs']):
                        if lang_idx1 == lang_idx2 or lang_idx2 <= lang_idx1:
                            continue

                        sent_sent = None
                        # if attention type is general, pass on the mapping matrix
                        if attention_type == 'general':
                            sent_sent = params_[
                                'sentence_%i_sentence_%i_mapping' %
                                (lang_idx1, lang_idx2)]

                        # text en to text de and vice-versa
                        (r1, r5, r10, medr, medr_double) = \
                            t2t(lss[ lang_idx1 ], lss[ lang_idx2 ],
                                n_sentences_per_image=n_sentences_per_image,
                                attention_type=attention_type, sent_sent=sent_sent)
                        print "%s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                            lang1, lang2, r1, r5, r10, medr, medr_double)

                        (r1, r5, r10, medr, medr_double) = \
                            t2t(lss[ lang_idx2 ], lss[ lang_idx1 ],
                                n_sentences_per_image=n_sentences_per_image,
                                attention_type=attention_type,
                                sent_sent=(sent_sent.T if sent_sent is not None else sent_sent))

                        print "%s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                            lang2, lang1, r1, r5, r10, medr, medr_double)

                # compute scores
                currranks = 0.  # the lower the better
                currscore = 0.  # the higher the better
                for lang_idx1, lang1 in enumerate(model_options['langs']):
                    sent_img = None
                    # if attention type is general, pass on the mapping matrix
                    if attention_type == 'general':
                        sent_img = params_['image_sentence_%i_mapping' %
                                           lang_idx1]

                    (r1, r5, r10, medr, medr_double) = \
                            i2t(lim, lss[ lang_idx1 ],
                                n_sentences_per_image=n_sentences_per_image,
                                attention_type=attention_type, sent_img=sent_img)
                    print "Image to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                        lang1, r1, r5, r10, medr, medr_double)

                    (r1i, r5i, r10i, medri, medr_doublei) = \
                            t2i(lim, lss[ lang_idx1 ],
                                n_sentences_per_image=n_sentences_per_image,
                                attention_type=attention_type, sent_img=sent_img)
                    print "%s text to image: %.1f, %.1f, %.1f, %d (%.2f)" % (
                        lang1, r1i, r5i, r10i, medri, medr_doublei)

                    # adjust current overall score including all languages
                    currranks += medr_double + medr_doublei
                    currscore += r1 + r5 + r10 + r1i + r5i + r10i

                    # best current score for individual language/image pair
                    currranks_lang = medr_double + medr_doublei
                    currscore_lang = r1 + r5 + r10 + r1i + r5i + r10i

                    # first, we select the model that ranks best (median rank).
                    # second, if there is a tie, we select the model that has best sum of scores (recall@k).
                    if currranks_lang < curr_ranks_langs[lang_idx1]:
                        curr_ranks_langs[lang_idx1] = currranks_lang
                        curr_scores_langs[lang_idx1] = currscore_lang

                        # save model
                        print 'saving best %s...' % lang1,
                        #params = unzip(tparams)
                        numpy.savez('%s.best-%s' % (saveto, lang1), **params_)
                        pkl.dump(
                            model_options,
                            open('%s.best-%s.pkl' % (saveto, lang1), 'wb'))
                        print 'done'

                    elif currranks_lang == curr_ranks_langs[lang_idx1]:
                        print '%s ranks are equal the current best (=%i) ...' % (
                            lang1, currranks_lang)
                        if currscore_lang > curr_scores_langs[lang_idx1]:
                            curr_scores_langs[lang_idx1] = currscore_lang

                            # save model
                            print 'saving best %s...' % lang1,
                            #params = unzip(tparams)
                            numpy.savez('%s.best-%s' % (saveto, lang1),
                                        **params_)
                            pkl.dump(
                                model_options,
                                open('%s.best-%s.pkl' % (saveto, lang1), 'wb'))
                            print 'done'

                if currranks < curr_best_rank:
                    curr_best_rank = currranks
                    curr_best_score = currscore
                    best_model_changed = True

                    # Save model
                    print 'Saving best overall model (%s)...' % str("-".join(
                        model_options['langs'])),
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print 'Done'
                elif currranks == curr_best_rank:
                    print 'global ranks are equal to the current best (=%i)...' % int(
                        currranks)
                    if currscore > curr_best_score:
                        # adjust current best overall score if needed
                        curr_best_score = currscore
                        best_model_changed = True

                        # Save model
                        print 'Saving best overall model (%s)...' % str(
                            "-".join(model_options['langs'])),
                        params = unzip(tparams)
                        numpy.savez(saveto, **params)
                        pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                        print 'Done'

            if numpy.mod(uidx, testFreq) == 0:

                if not best_model_changed:
                    print '.. Best model on valid set did not change from previous evaluation on test set...'
                    print ''
                else:
                    print '.. Computing results on test set...'

                    # update current best model (on the valid set)
                    best_model_dev = load_model(saveto, verbose=False)

                    # encode sentences
                    lss = []
                    for lang_idx, lang in enumerate(model_options['langs']):
                        ls = encode_multilingual_sentences(best_model_dev,
                                                           test[0][lang_idx],
                                                           lang=lang)
                        lss.append(ls)
                    #ls = encode_multilingual_sentences(best_model_dev, test[0])

                    n_sentences_per_image = test[2] if len(test) == 3 else 1
                    test_img_feats = numpy.repeat(test[1],
                                                  n_sentences_per_image,
                                                  axis=0).astype('float32')
                    lim = encode_images(best_model_dev, test_img_feats)

                    for lang_idx1, lang1 in enumerate(model_options['langs']):
                        for lang_idx2, lang2 in enumerate(
                                model_options['langs']):
                            if lang_idx1 == lang_idx2 or lang_idx2 <= lang_idx1:
                                continue

                            sent_sent = None
                            # if attention type is general, pass on the mapping matrix
                            if attention_type == 'general':
                                sent_sent = best_model_dev['tparams'][
                                    'sentence_%i_sentence_%i_mapping' %
                                    (lang_idx1, lang_idx2)]

                            (r1, r5, r10, medr, medr_double) = \
                                    t2t(lss[ lang_idx1 ], lss[ lang_idx2 ],
                                        n_sentences_per_image=n_sentences_per_image,
                                        attention_type=attention_type, sent_sent=sent_sent)
                            print ".. %s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                                lang1, lang2, r1, r5, r10, medr, medr_double)

                            (r1, r5, r10, medr, medr_double) = \
                                    t2t(lss[ lang_idx2 ], lss[ lang_idx1 ],
                                        n_sentences_per_image=n_sentences_per_image,
                                        attention_type=attention_type,
                                        sent_sent=(sent_sent.T if sent_sent is not None else sent_sent))
                            print ".. %s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                                lang2, lang1, r1, r5, r10, medr, medr_double)

                    #for i in range(len(lss)):
                    for lang_idx1, lang1 in enumerate(model_options['langs']):
                        sent_img = None
                        # if attention type is general, pass on the mapping matrix
                        if attention_type == 'general':
                            sent_img = best_model_dev['tparams'][
                                'image_sentence_%i_mapping' % lang_idx1]

                        (r1, r5, r10, medr, medr_double) = i2t(
                            lim,
                            lss[lang_idx1],
                            n_sentences_per_image=n_sentences_per_image,
                            attention_type=attention_type,
                            sent_img=sent_img)
                        print ".. Image to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % (
                            lang1, r1, r5, r10, medr, medr_double)

                        (r1i, r5i, r10i, medri, medr_doublei) = t2i(
                            lim,
                            lss[lang_idx1],
                            n_sentences_per_image=n_sentences_per_image,
                            attention_type=attention_type,
                            sent_img=sent_img)
                        print ".. %s text to image: %.1f, %.1f, %.1f, %d (%.2f)" % (
                            lang1, r1i, r5i, r10i, medri, medr_doublei)

                    best_model_changed = False
                    print ''

        ep_end = time.time()
        ep_times.append(ep_end)
        #print 'Seen %d samples'%n_samples

        seconds = ep_times[-1] - ep_times[0]
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        print "Seen %i epoch(s) (%i samples) in %d:%02d:%02d" % (
            eidx, n_samples, h, m, s)

    seconds = ep_times[-1] - ep_times[0]
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    print "Finished execution in %d:%02d:%02d" % (h, m, s)
예제 #12
0
def train(margin=0.2,
          dim=300,
          dim_word=300,
          max_epochs=100,
          dispFreq=50,
          validFreq=200,
          grad_clip=2.0,
          maxlen_w=150,
          batch_size=300,
          early_stop=20,
          lrate=0.001,
          reload_=False,
          load_dict=False):
    # Model options
    model_options = {}
    model_options['UM_Corpus'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print('loading dataset')
    train, dev = load_dataset(data)
    test = load_dataset(data, load_test=True)
    if load_dict:
        with open('%s.dictionary.pkl' % saveto, 'rb') as f:
            worddict = pkl.load(f)
            n_words = len(worddict)
            model_options['n_words'] = len(worddict)
    else:
        # Create and save dictionary
        print('Create dictionary')
        worddict = build_dictionary(train[0] + train[1] + dev[0] + dev[1])
        n_words = len(worddict)
        model_options['n_words'] = n_words
        print('Dictionary size: ' + str(n_words))
        with open('%s.dictionary_%s.pkl' % (saveto, run), 'wb') as f:
            pkl.dump(worddict, f)

    # # Inverse dictionary
    # word_idict = dict()
    # for kk, vv in worddict.iteritems():
    #     word_idict[vv] = kk
    # word_idict[0] = '<eos>'
    # word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    # model_options['word_idict'] = word_idict

    # # Each sentence in the minibatch have same length (for encoder)
    # train_iter = HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    share_model = LIUMCVC_Encoder(model_options)
    # gpus = [0, 1, 2, 3]
    # share_model = torch.nn.DataParallel(share_model, device_ids=gpus)
    share_model = share_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, share_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    # decrease learning rate
    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10)

    uidx = 0
    curr = 1e10
    n_samples = 0

    # For Early-stopping
    best_step = 0

    for eidx in xrange(1, max_epochs + 1):

        print('Epoch ', eidx)

        train_data_index = prepare_data(train, worddict, n_words)
        for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator(
                train_data_index, batch_size):
            uidx += 1
            n_samples += len(en)
            en, cn = share_model(en, en_lengths, en_index, cn, cn_lengths,
                                 cn_index)

            loss = loss_fn(en, cn)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', loss.data.cpu(
                ).numpy()[0]

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                # curr_model['word_idict'] = word_idict
                curr_model['en_cn_model'] = share_model

                r_time = time.time()
                fen, fcn = encode_sentences(curr_model, dev)
                score = devloss(fen, fcn, margin=margin)
                fen, fcn = encode_sentences(curr_model, test, test=True)
                test_score = devloss(fen, fcn, margin=margin)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                curr_step = uidx / validFreq

                #scheduler.step(score)

                currscore = score
                print 'loss on dev', score
                print 'loss on test', test_score
                if currscore < curr:
                    curr = currscore
                    # best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                    # best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri
                    best_step = curr_step

                    # Save model
                    print 'Saving model...',
                    pkl.dump(model_options,
                             open('%s_params_%s.pkl' % (saveto, run), 'wb'))
                    torch.save(share_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, run))
                    print 'Done'

                if curr_step - best_step > early_stop:
                    print 'Early stopping ...'
                    # print "cn to en: %.1f, %.1f, %.1f, %.1f" % (best_r1, best_r5, best_r10, best_medr)
                    # print "en to cn: %.1f, %.1f, %.1f, %.1f" % (best_r1i, best_r5i, best_r10i, best_medri)
                    return

        print 'Seen %d samples' % n_samples
예제 #13
0
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0
    
    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
예제 #14
0
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl' % load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime(
        '%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName,
              hyperparams=model_options,
              saveDir='vis/training',
              xLabel='Examples Seen',
              saveFrequency=1)

    print curr_model['options']

    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'],
                           cnn=model_options['cnn'],
                           load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))

    print 'Loading data'
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)

            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs,
                                                              vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1, r5, r10, medr, meanr)
                log.update(
                    {
                        'R@1': r1,
                        'R@5': r5,
                        'R@10': r10,
                        'median_rank': medr,
                        'mean_rank': meanr
                    }, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        'Image2Caption_R@1': r1i,
                        'Image2Caption_R@5': r5i,
                        'Image2CaptionR@10': r10i,
                        'Image2Caption_median_rank': medri,
                        'Image2Caption_mean_rank': meanri
                    }, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open(
                            'vis/roc/%s/%s.json' %
                        (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open('vis/roc/index.json', 'r'))
                    except IOError:
                        index = {model_options['data']: []}

                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)

        print 'Seen %d samples' % n_samples
# coding: utf-8

import vocab
import train
import tools
import numpy as np

with open("../../wikipedia_txt/result_wakati.txt") as f:
    fdata = [line.rstrip() for i, line in enumerate(f)]
print '# lines: ', len(fdata)

worddict, wordcount = vocab.build_dictionary(fdata)
vocab.save_dictionary(worddict, wordcount, "word_dict")
print '# vocab: ', len(worddict)

train.trainer(fdata, dictionary="word_dict", saveFreq=100, saveto="model", reload_=True, n_words=40000)

model = tools.load_model()
vectors = tools.encode(model, fdata, use_norm=False)
np.savez('vecs.npz', vectors)

예제 #16
0
def trainer(load_from=None,
            save_dir='snapshots',
            name='anon',
            **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl'%load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training',
              xLabel='Examples Seen', saveFrequency=1)


    print curr_model['options']




    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps']+dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))


    print 'Loading data'
    train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err



    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0


    
    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)


            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)


                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    index = json.load(open('vis/roc/index.json', 'r'))
                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)






        print 'Seen %d samples'%n_samples
예제 #17
0
def trainer(data='f30k',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            max_epochs=15,
            encoder='lstm',
            dispFreq=10,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/f30K',
            validFreq=100,
            early_stop=20,
            lrate=1e-3,
            reload_=False):
    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    logging.info(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        logging.info('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    logging.info('loading dataset')
    titles, album_ims, artist, genre = load_dataset(data)
    artist_string = artist
    genre_string = genre

    # Create and save dictionary
    if os.path.exists('%s.dictionary.pkl' % saveto):
        logging.info('loading dict from...' + saveto)
        with open('%s.dictionary.pkl' % saveto, 'rb') as wdict:
            worddict = pkl.load(wdict)
        n_words = len(worddict)
        model_options['n_words'] = n_words
        logging.info('Dictionary size: ' + str(n_words))
    else:

        logging.info('Create dictionary')
        worddict = build_dictionary(titles + artist + genre)[0]
        n_words = len(worddict)
        model_options['n_words'] = n_words
        logging.info('Dictionary words: ' + str(n_words))
        with open('%s.dictionary.pkl' % saveto, 'wb') as f:
            pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.items():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData(
        [titles, album_ims, artist, genre],
        batch_size=batch_size,
        maxlen=maxlen_w)

    img_sen_model = Img_Sen_Artist_Ranking(model_options)
    # todo code to load saved model dict
    if os.path.exists('%s_model_%s.pkl' % (saveto, encoder)):
        logging.info('Loading model...')
        # pkl.dump(model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
        img_sen_model.load_state_dict(
            torch.load('%s_model_%s.pkl' % (saveto, encoder)))
        logging.info('Done')
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin).cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lr=lrate)
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.1,
                                  patience=40,
                                  mode='min',
                                  verbose=True,
                                  threshold=1e-8)

    uidx = 0
    curr = 0.0
    n_samples = 0

    # For Early-stopping
    best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0
    best_step = 0

    writer = SummaryWriter()
    for eidx in range(max_epochs):

        for x, im, artist, genre in train_iter:
            n_samples += len(x)
            uidx += 1

            x, im, artist, genre = homogeneous_data.prepare_data(
                x,
                im,
                artist,
                genre,
                worddict,
                maxlen=maxlen_w,
                n_words=n_words)

            if x is None:
                logging.info('Minibatch with zero sample under length ',
                             maxlen_w)
                uidx -= 1
                continue

            x = Variable(torch.from_numpy(x).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            artist = Variable(torch.from_numpy(artist).cuda())
            genre = Variable(torch.from_numpy(genre).cuda())
            # Update
            x1, im1, artist, genre = img_sen_model(x, im, artist, genre)

            #make validation on inout before trainer see it
            if numpy.mod(uidx, validFreq) == 0:
                img_sen_model.eval()
                with torch.no_grad():
                    print('Epoch ', eidx, '\tUpdate@ ', uidx, '\tCost ',
                          cost.data.item())
                    writer.add_scalar('Evaluation/Validation_Loss',
                                      cost.data.item(), uidx)
                    (r1, r5, r10, medr) = i2t(im1, x)  #distances with l2norm
                    logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" %
                                 (r1, r5, r10, medr))

                    (r1g, r5g, r10g, medrg) = i2t(im1, genre)
                    logging.info("Image to genre: %.1f, %.1f, %.1f, %.1f" %
                                 (r1g, r5g, r10g, medrg))

                    (r1a, r5a, r10a, medra) = i2t(im1, artist)
                    logging.info("Image to Artist: %.1f, %.1f, %.1f, %.1f" %
                                 (r1a, r5a, r10a, medra))

                    logging.info("Cal Recall@K ")
                    writer.add_scalars('Validation Recal/Image2Album', {
                        'r@1': r1,
                        'r@5': r5,
                        'r@10': r10
                    }, uidx)

                    writer.add_scalars('Validation Recal/Image2Genres', {
                        'r@1': r1g,
                        'r@5': r5g,
                        'r@10': r10g
                    }, uidx)

                    writer.add_scalars('Validation Recal/Image2Artist', {
                        'r@1': r1a,
                        'r@5': r5a,
                        'r@10': r5a
                    }, uidx)

                    curr_step = uidx / validFreq

                    currscore = r1 + r5 + r10 + r1a + r5a + r10a + r1g + r5g + r10g - medr - medrg - medra
                    if currscore > curr:
                        curr = currscore
                        best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                        best_r1g, best_r5g, best_r10g, best_medrg = r1, r5, r10, medrg
                        best_step = curr_step

                        # Save model
                        logging.info('Saving model...')
                        pkl.dump(
                            model_options,
                            open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                        torch.save(img_sen_model.state_dict(),
                                   '%s_model_%s.pkl' % (saveto, encoder))
                        logging.info('Done')

                    if curr_step - best_step > early_stop:
                        logging.info('early stopping, jumping now...')
                        logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" %
                                     (best_r1, best_r5, best_r10, best_medr))
                        logging.info(
                            "Image to genre: %.1f, %.1f, %.1f, %.1f" %
                            (best_r1g, best_r5g, best_r10g, best_medrg))

                        #return 0
                        lrate = 1e-4
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lrate

            img_sen_model.train()
            cost = loss_fn(im1, x1, artist, genre)
            writer.add_scalar('Evaluation/training_Loss', cost, uidx)

            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm_(params, grad_clip)

            scheduler.step(cost.data.item())
            optimizer.step()

        #scheduler.step(cost.data.item())
        logging.info('Seen %d samples' % n_samples)
예제 #18
0
def trainer(
        data='coco',  #f8k, f30k, coco
        margin=0.2,
        dim=1024,
        dim_image=4096,
        dim_word=300,
        encoder='gru',  # gru OR bow
        max_epochs=15,
        dispFreq=10,
        decay_c=0.,
        grad_clip=2.,
        maxlen_w=100,
        optimizer='adam',
        batch_size=128,
        saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
        validFreq=100,
        lrate=0.0002,
        reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print('Loading dataset')
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print('Creating dictionary')
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print('Dictionary size: ' + str(n_words))
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print('Building model')
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print('Building f_log_probs...', )
    f_log_probs = theano.function(inps, cost, profile=False)
    print('Done')

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print('Building f_cost...', )
    f_cost = theano.function(inps, cost, profile=False)
    print('Done')

    print('Building sentence encoder')
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...', )
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print('Building optimizers...', )
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print('Optimization')

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0

    for eidx in xrange(max_epochs):

        print('Epoch ', eidx)

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x,
                                                        im,
                                                        worddict,
                                                        maxlen=maxlen_w,
                                                        n_words=n_words)

            if x == None:
                print('Minibatch with zero sample under length ', maxlen_w)
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)

            if numpy.mod(uidx, validFreq) == 0:

                print('Computing results...')
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print("Image to text: %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr))
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print("Text to image: %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri))

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print('Saving...', )
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print('Done')

        print('Seen %d samples' % n_samples)
def trainer(**kwargs):
    """
    Train the model according to input params
    Info about input params is available in parameters.py
    """
    # Timing
    print('Starting time:', datetime.now())
    sys.stdout.flush()
    t_start_train = time.time()

    # Model options
    # load old model, including parameters, but overwrite with new options

    # Extract model options from arguments
    model_options = {}
    for k, v in kwargs.iteritems():
        model_options[k] = v

    # Print input options
    print('PARAMETERS BEFORE LOADING:')
    for k, v in model_options.items():
        print('{:>26}: {}'.format(k, v))
    sys.stdout.flush()

    # Reload options if required
    curr_model = dict()
    if model_options['reload_']:
        # Reload model parameters
        opt_filename_reload = get_opt_filename(model_options, previous=True)
        print('reloading...', opt_filename_reload)
        sys.stdout.flush()
        try:
            with open(opt_filename_reload, 'rb') as f:
                curr_model = pkl.load(f)
        except:
            print(
                'Failed to reload parameters, try to use only feeded parameters'
            )
            curr_model['options'] = {}

        # Check if we reload from best model or last model
        if model_options['load_from'] in ['Best', 'best', 'B', 'b']:
            load_from_best = True
            print('Loading from Best saved model in validation results')
        elif model_options['load_from'] in ['Last', 'last', 'L', 'l']:
            load_from_best = False
            print('Loading from Last saved model')
        else:
            print('Unkown choice for "load_from" parameter',
                  model_options['load_from'])
            print('Please choose one of:', ['Best', 'best', 'B', 'b'],
                  ['Last', 'last', 'L', 'l'])
            print('Using Last as default')
            load_from_best = False

        # Reload end-point parameters
        state_filename = get_sol_filename(model_options,
                                          best=load_from_best,
                                          previous=True)
        print('reloading...', state_filename)
        sys.stdout.flush()
        try:
            with open(state_filename, 'rb') as f:
                state_params = pkl.load(f)
            if load_from_best:
                init_epoch = state_params['epoch']
                solution = state_params
            else:
                init_epoch = state_params['epoch_done'] + 1
                solution = state_params['solution']
            best_val_score = solution['best_val_score']
            n_samples = solution['samples_seen']
        except:
            print('Failed to reload state parameters, starting from 0')
            init_epoch = 0
            best_val_score = 0
            n_samples = 0

    else:
        curr_model['options'] = {}
        init_epoch = 0
        best_val_score = 0
        n_samples = 0

    # Overwrite loaded options with input options
    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v
    model_options = curr_model['options']

    # Print final options loaded
    if model_options['reload_']:
        print('PARAMETERS AFTER LOADING:')
        for k, v in model_options.items():
            print('{:>26}: {}'.format(k, v))
        sys.stdout.flush()

    # Load training and development sets
    print('Loading dataset')
    sys.stdout.flush()

    dataset = load_dataset(dataset_name=model_options['data'],
                           embedding=model_options['embedding'],
                           path_to_data=model_options['data_path'],
                           test_subset=model_options['test_subset'],
                           load_train=True,
                           fold=0)
    train = dataset['train']
    dev = dataset['val']

    # Create word dictionary
    print('Creating dictionary')
    sys.stdout.flush()
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print('Dictionary size: ' + str(len(worddict)))
    sys.stdout.flush()
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    opt_filename_save = get_opt_filename(model_options, previous=False)
    print('Saving model parameters in', opt_filename_save)
    sys.stdout.flush()
    try:
        os.makedirs(os.path.dirname(opt_filename_save))
    except:
        pass
    pkl.dump(curr_model, open(opt_filename_save, 'wb'))

    # Load data from dataset
    print('Loading data')
    sys.stdout.flush()
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print('Building model')
    sys.stdout.flush()
    params = init_params(model_options)

    # reload network parameters, ie. weights
    if model_options['reload_']:
        params_filename = get_npz_filename(model_options,
                                           best=load_from_best,
                                           previous=True)
        params = load_params(params_filename, params)

    tparams = init_tparams(params)
    inps, cost = build_model(tparams, model_options)

    print('Building sentence encoder')
    sys.stdout.flush()
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    sys.stdout.flush()
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...')
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print('Building errors...')
    sys.stdout.flush()
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print('Building optimizers...')
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    # Get names for the files to save model and solution
    sol_filename_best = get_sol_filename(model_options,
                                         best=True,
                                         previous=False)
    sol_filename_last = get_sol_filename(model_options,
                                         best=False,
                                         previous=False)
    params_filename_best = get_npz_filename(model_options,
                                            best=True,
                                            previous=False)
    params_filename_last = get_npz_filename(model_options,
                                            best=False,
                                            previous=False)

    print('PATHS TO MODELS:')
    for filename in [
            sol_filename_best, sol_filename_last, params_filename_best,
            params_filename_last
    ]:
        print(filename)
        sys.stdout.flush()
        try:
            os.makedirs(os.path.dirname(filename))
        except:
            pass

    # Start optimization
    print('Optimization')
    sys.stdout.flush()

    uidx = 0

    # Timing
    t_start = time.time()
    print('Starting time:', datetime.now())

    for eidx in range(init_epoch, model_options['max_epochs']):
        t_start_epoch = time.time()
        print('Epoch ', eidx)
        sys.stdout.flush()

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                sys.stdout.flush()
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)
                sys.stdout.flush()

            if numpy.mod(uidx, model_options['validFreq']) == 0:
                print('Computing results...')
                sys.stdout.flush()

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr) = i2t(dev_errs)
                (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs)
                print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri, meanri))
                sys.stdout.flush()
                print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr, meanr))
                sys.stdout.flush()

                # Score
                val_score = r1 + r5 + r10 + r1i + r5i + r10i
                if val_score > best_val_score:

                    print('BEST MODEL FOUND')
                    print('Score:', val_score)
                    print('Previous best score:', best_val_score)
                    best_val_score = val_score
                    # Join in a results dict
                    results_dict = build_results_dict(r1, r5, r10, medr, r1i,
                                                      r5i, r10i, medri)

                    # Save parameters
                    print('Saving...', end=' ')
                    sys.stdout.flush()
                    numpy.savez(params_filename_best, **unzip(tparams))
                    print('Done')
                    sys.stdout.flush()

                    # Update solution
                    solution = OrderedDict([
                        ('epoch', eidx), ('update', uidx),
                        ('samples_seen', n_samples),
                        ('best_val_score', best_val_score),
                        ('best_val_res', results_dict),
                        ('time_until_results',
                         str(timedelta(seconds=(time.time() - t_start_train))))
                    ])
                    pkl.dump(solution, open(sol_filename_best, 'wb'))

        print('Seen %d samples' % n_samples)
        sys.stdout.flush()

        # Timing
        t_epoch = time.time() - t_start_epoch
        t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch))
        print('Time for this epoch:', str(timedelta(seconds=t_epoch)),
              'Average:', str(timedelta(seconds=t_epoch_avg)))
        t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1))
        print('Time since start session:',
              str(timedelta(seconds=time.time() - t_start)),
              'Estimated time to complete training:',
              str(timedelta(seconds=t_2_complete)))
        print('Current time:', datetime.now())
        sys.stdout.flush()

        # Save current model
        try:
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        except:
            solution = OrderedDict([
                ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples),
                ('best_val_score', best_val_score),
                ('time_until_results',
                 str(timedelta(seconds=(time.time() - t_start_train))))
            ])
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        pkl.dump(state_params, open(sol_filename_last, 'wb'))

        # Save parameters
        print('Saving LAST npz...', end=' ')
        sys.stdout.flush()
        numpy.savez(params_filename_last, **unzip(tparams))
        print('Done')
        sys.stdout.flush()

    return solution
예제 #20
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            max_epochs=15,
            encoder='lstm',
            dispFreq=10,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            early_stop=20,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    # For Early-stopping
    best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0
    best_r1i, best_r5i, best_r10i, best_medri = 0.0, 0.0, 0.0, 0
    best_step = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, im = homogeneous_data.prepare_data(x,
                                                  im,
                                                  worddict,
                                                  maxlen=maxlen_w,
                                                  n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x = Variable(torch.from_numpy(x).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            x, im = img_sen_model(x, im)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', cost.data.cpu(
                ).numpy()[0]

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r_time = time.time()
                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10,
                                                                 medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i,
                                                                 r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                curr_step = uidx / validFreq

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore
                    best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                    best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri
                    best_step = curr_step

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

                if curr_step - best_step > early_stop:
                    print 'Early stopping ...'
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1, best_r5, best_r10, best_medr)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1i, best_r5i, best_r10i, best_medri)
                    return 0

        print 'Seen %d samples' % n_samples
예제 #21
0
def trainer(
        data=['f30k-comparable', 'f30k-translational'],
        langs=['en', 'de'],
        margin=1,
        dim=1600,  # 800 forward, 800 backward
        dim_image=4096,
        dim_word=300,
        encoders={
            'en': 'gru',
            'de': 'gru'
        },  # gru OR bow
        max_epochs=80,
        dispFreq=50,
        decay_c=0,
        grad_clip=2.,
        maxlen_w=100,
        optimizer='adam',
        batch_size=128,
        saveto='./f30k-half-comparable-and-translational.npz',
        validFreq=100,
        lrate=0.0002,
        reload_=False,
        # new parameters
        minlen_w=10,
        max_words={
            'en': 0,
            'de': 0
        },  # integer, zero means unlimited
        debug=False,
        use_dropout=True,
        dropout_prob=0.3,
        load_test=False,
        lambda_img_sent=0.75,
        lambda_sent_sent=0.25,
        bidirectional_enc=True,
        n_enc_hidden_layers=1):
    #use_all_costs=True):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['langs'] = langs
    for lang in langs:
        model_options['encoder_%s' % lang] = encoders[lang]
        model_options['max_words_%s' % lang] = max_words[lang]
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_
    model_options['minlen_w'] = minlen_w
    model_options['use_dropout'] = use_dropout
    model_options['dropout_prob'] = dropout_prob
    model_options['bidirectional_enc'] = bidirectional_enc
    model_options['n_enc_hidden_layers'] = n_enc_hidden_layers
    model_options['load_test'] = load_test
    model_options['lambda_img_sent'] = lambda_img_sent
    model_options['lambda_sent_sent'] = lambda_sent_sent
    #model_options['use_all_costs'] = use_all_costs

    assert (n_enc_hidden_layers >= 1)

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_multilingual_dataset(data, langs,
                                           load_test=load_test)[:2]

    # Create and save dictionaries
    print 'Creating and saving multilingual dictionaries %s' % (", ".join(
        model_options['langs']))
    worddicts = []
    iworddicts = []
    for lang_idx, lang in enumerate(langs):
        # built dictionaries including all comparable and translational vocab
        worddict = build_dictionary(train[0][0][lang_idx] +
                                    train[1][0][lang_idx] +
                                    dev[0][0][lang_idx] +
                                    dev[1][0][lang_idx])[0]
        n_words_dict = len(worddict)
        #print '%s dictionary size: %s'%(lang,str(n_words_dict))
        with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f:
            pkl.dump(worddict, f)

        # Inverse dictionaries
        iworddict = dict()
        for kk, vv in worddict.iteritems():
            iworddict[vv] = kk
        iworddict[0] = '<eos>'
        iworddict[1] = 'UNK'

        worddicts.append(worddict)
        iworddicts.append(iworddict)

        model_options[
            "n_words_%s" %
            lang] = n_words_dict if max_words[lang] == 0 else max_words[lang]

    # assert all max_words per language are equal
    assert (all(x == max_words.values()[0] for x in max_words.values()))

    print model_options

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, is_translational, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building multilingual sentence encoders'
    trng, alls_se = build_sentence_encoders(tparams, model_options)
    f_sencs = []
    for inps_se in alls_se:
        #print "sentence encoder input", inps_se
        inp_se, sentences = inps_se
        f_senc = theano.function(inp_se, sentences, profile=False)
        f_sencs.append(f_senc)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # train:
    # [(c_train_caps_list, c_train_ims, 0), (t_train_caps_list, t_train_ims, 1)]

    # create training set iterator where
    # a heuristic tries to make sure sentences in minibatch have a similar size
    train_comparable_iter = HomogeneousDataMultilingualWithTranslationalEvidence(
        train[0], batch_size=batch_size, maxlen=maxlen_w, minlen=minlen_w)

    train_translational_iter = HomogeneousDataMultilingualWithTranslationalEvidence(
        train[1], batch_size=batch_size, maxlen=maxlen_w, minlen=minlen_w)

    uidx = 0
    curr = 0.
    curr_langs = [0.] * len(model_options['langs'])
    n_samples = 0

    ep_start = time.time()
    ep_times = [ep_start]
    for eidx in xrange(max_epochs):
        print 'Epoch ', eidx

        for xs, im, is_translational_ in itertools.chain(
                train_comparable_iter, train_translational_iter):
            uidx += 1
            xs, masks, im = prepare_data(xs, im, is_translational_, \
                                         worddicts, \
                                         model_options=model_options, \
                                         maxlen=maxlen_w)

            is_translational.set_value(is_translational_)

            if xs[0] is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # do not train on certain small sentences (less than 3 words)
            #if not x_src.shape[0]>=minlen_w and x_tgt.shape[0]>= minlen_w:
            if not all(x.shape[0] >= minlen_w for x in xs):
                print "At least one minibatch (in one of the languages in the model)",
                print "has less words than %i. Skipping..." % minlen_w
                skipped_samples += xs[0].shape[1]
                uidx -= 1
                continue

            n_samples += len(xs[0])

            # Update
            ud_start = time.time()
            # flatten inputs for theano function
            inps_ = []
            inps_.extend(xs)
            inps_.extend(masks)
            inps_.append(im)
            #inps_.append(is_translational_)
            #cost = f_grad_shared(xs, masks, im)
            cost = f_grad_shared(*inps_)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'Translational ', is_translational_, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                # store model's language dependent parameters
                for lang, worddict, iworddict, f_senc in zip(
                        langs, worddicts, iworddicts, f_sencs):
                    curr_model['worddict_%s' % lang] = worddict
                    curr_model['wordidict_%s' % lang] = iworddict
                    curr_model['f_senc_%s' % lang] = f_senc
                #curr_model['worddicts'] = worddicts
                #curr_model['wordidicts'] = iworddicts
                #curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                # encode sentences
                lss = []
                for lang_idx, lang in enumerate(model_options['langs']):
                    # dev:
                    # ((c_dev_caps_list, c_dev_ims, 0), (t_dev_caps_list, t_dev_ims, 1))

                    dev_set = dev[is_translational_]
                    #ls  = encode_multilingual_sentences(curr_model, dev[0][lang_idx], lang=lang)
                    ls = encode_multilingual_sentences(curr_model,
                                                       dev_set[0][lang_idx],
                                                       lang=lang)
                    lss.append(ls)
                lim = encode_images(curr_model, dev_set[1].astype('float32'))
                #lim = encode_images(curr_model, dev[1].astype('float32'))

                # compute scores
                currscore = 0
                for i in range(len(lss)):
                    (r1, r5, r10, medr) = i2t(lim, lss[i])
                    print "Image to %s text: %.1f, %.1f, %.1f, %.1f" % (
                        model_options['langs'][i], r1, r5, r10, medr)
                    (r1i, r5i, r10i, medri) = t2i(lim, lss[i])
                    print "%s text to image: %.1f, %.1f, %.1f, %.1f" % (
                        model_options['langs'][i], r1i, r5i, r10i, medri)

                    # adjust current overall score
                    #currscore += r1 + r5 + r10 + r1i + r5i + r10i
                    currscore += r1 + (r5 / 1.5) + (r10 / 2) + r1i + (
                        r5i / 1.5) + (r10i / 2)

                    # best current score for individual language/image pair
                    #currscore_lang = r1 + r5 + r10 + r1i + r5i + r10i
                    currscore_lang = r1 + (r5 / 1.5) + (r10 / 2) + r1i + (
                        r5i / 1.5) + (r10i / 2)
                    if currscore_lang > curr_langs[i]:
                        curr_langs[i] = currscore_lang

                        # save model
                        print 'saving best %s...' % model_options['langs'][i],
                        params = unzip(tparams)
                        numpy.savez(
                            '%s.best-%s' % (saveto, model_options['langs'][i]),
                            **params)
                        pkl.dump(
                            model_options,
                            open(
                                '%s.best-%s.pkl' %
                                (saveto, model_options['langs'][i]), 'wb'))
                        print 'done'

                # adjust current best overall score if needed
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving best overall model (%s)...' % str("-".join(
                        model_options['langs'])),
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print 'Done'

        ep_end = time.time()
        ep_times.append(ep_end)

        print 'Seen %d samples' % n_samples

    seconds = ep_times[-1] - ep_times[0]
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)
    print "Finished execution in %d:%02d:%02d" % (h, m, s)