Пример #1
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            max_epochs=15,
            encoder='lstm',
            dispFreq=10,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            early_stop=20,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    # For Early-stopping
    best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0
    best_r1i, best_r5i, best_r10i, best_medri = 0.0, 0.0, 0.0, 0
    best_step = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, im = homogeneous_data.prepare_data(x,
                                                  im,
                                                  worddict,
                                                  maxlen=maxlen_w,
                                                  n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x = Variable(torch.from_numpy(x).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            x, im = img_sen_model(x, im)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', cost.data.cpu(
                ).numpy()[0]

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r_time = time.time()
                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10,
                                                                 medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i,
                                                                 r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                curr_step = uidx / validFreq

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore
                    best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                    best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri
                    best_step = curr_step

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

                if curr_step - best_step > early_stop:
                    print 'Early stopping ...'
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1, best_r5, best_r10, best_medr)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1i, best_r5i, best_r10i, best_medri)
                    return 0

        print 'Seen %d samples' % n_samples
Пример #2
0
def trainer(X, C, stmodel,
            dimctx=4800, #vector dimensionality
            dim_word=620, # word vector dimensionality
            dim=1600, # the number of GRU units
            encoder='gru',
            decoder='gru',
            doutput=False,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=40000,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 16,
            saveto='adventuremodel.npz',
            dictionary='/home/jm7432/tell-tall-tales/decoding/adventure_dict_final.pkl',
            embeddings=None,
            saveFreq=1000,
            sampleFreq=100,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dimctx'] = dimctx
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['doutput'] = doutput
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['sampleFreq'] = sampleFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings != None:
        print 'Loading embeddings...'
        with open(embeddings, 'rb') as f:
            embed_map = pkl.load(f)
        dim_word = len(embed_map.values()[0])
        model_options['dim_word'] = dim_word
        preemb = norm_weight(n_words, dim_word)
        pz = defaultdict(lambda : 0)
        for w in embed_map.keys():
            pz[w] = 1
        for w in worddict.keys()[:n_words-2]:
            if pz[w] > 0:
                preemb[worddict[w]] = embed_map[w]
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, c in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                x_s = x
                mask_s = mask
                ctx_s = ctx
                for jj in xrange(numpy.minimum(10, len(ctx_s))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options,
                                               trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False)
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

        print 'Seen %d samples'%n_samples
Пример #3
0
def trainer(
        X,
        dim_word=620,  # word vector dimensionality
        dim=2400,  # the number of GRU units
        encoder='gru',
        decoder='gru',
        max_epochs=5,
        dispFreq=1,
        decay_c=0.,
        grad_clip=5.,
        n_words=474000,
        maxlen_w=30,
        optimizer='adam',
        batch_size=64,
        saveto='/data/embeddingModel.npz',
        dictionary='dictionary.pkl',
        saveFreq=1000,
        reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print('Loading dictionary...')
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.items():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print('Building model')
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print('Building f_log_probs...')
    f_log_probs = theano.function(inps, cost, profile=False)
    print('Done')

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.items():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print('Building f_cost...')
    f_cost = theano.function(inps, cost, profile=False)
    print('Done')

    print('Done')
    print('Building f_grad...')
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([],
                                    [(t**2).sum() for k, t in tparams.items()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print('Building optimizers...', end='')
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print('Optimization')

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX,
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in range(max_epochs):
        n_samples = 0

        print('Epoch ', eidx)

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(
                x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x is None:
                print('Minibatch with zero sample under length ', maxlen_w)
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)

            if numpy.mod(uidx, saveFreq) == 0:
                print('Saving...', end='')

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print('Done')

        print('Seen %d samples' % n_samples)
Пример #4
0
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0
    
    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
Пример #6
0
def trainer(Xs,
            Xs_val,
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            decoder='gru',
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            embeddings=None,
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder 
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings:
        print 'Loading embeddings...'
        from gensim.models import Word2Vec as word2vec
        embed_map = word2vec.load_word2vec_format(embeddings, binary=True)
        model_options['dim_word'] = dim_word = embed_map.vector_size
        preemb = norm_weight(n_words, dim_word)
        preemb_mask = numpy.ones((n_words, 1), dtype='float32')
        for w,i in worddict.items()[:n_words-2]:
            if w in embed_map:
                preemb[i] = embed_map[w]
                preemb_mask[i] = 0 # don't propagate gradients into pretrained embs
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    if embeddings:
        param_preemb_mask = theano.shared(preemb_mask, name='preemb_mask', broadcastable=(False, True))
        grads[0] *= param_preemb_mask

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    if type(Xs[0]) is not list: Xs = [Xs]
    if type(Xs_val[0]) is not list: Xs_val = [Xs_val]
    trainXs = map(hd.grouper, Xs)
    valXs = map(hd.grouper, Xs_val)
    train_iters = [hd.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) for trainX in trainXs]
    val_iters = [hd.HomogeneousData(valX, batch_size=batch_size, maxlen=maxlen_w) for valX in valXs]

    f_progress = open('%s_progress.txt' % saveto, 'w', 1)
    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for train_iter in train_iters:
            for x, y, z in train_iter:
                n_samples += len(x)
                uidx += 1

                x, x_mask, y, y_mask, z, z_mask = hd.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

                if x == None:
                    print 'Minibatch with zero sample under length ', maxlen_w
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
                f_update(lrate)
                ud = time.time() - ud_start

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

                if numpy.mod(uidx, saveFreq) == 0:
                    val_logprob = n_val_samples = 0
                    for val_iter in val_iters:
                        for x, y, z in val_iter:
                            n_val_samples += len(x)
                            x, x_mask, y, y_mask, z, z_mask = hd.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)
                            val_logprob += f_log_probs(x, x_mask, y, y_mask, z, z_mask)
                    val_logprob /= n_val_samples
                    print 'LOGPROB: %s' % val_logprob
                    f_progress.write('%s\n' % val_logprob)

                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez('%s_%.3f' % (saveto, val_logprob), history_errs=[], **params)
                    pkl.dump(model_options, open('%s_%.3f.pkl'%(saveto, val_logprob), 'wb'))
                    print 'Done'

            print 'Seen %d samples'%n_samples
Пример #7
0
def main():
    model_config = configuration.ModelConfig()
    model_config.data = FLAGS.input_dataset_name

    #loading dataset
    print('Loading dataset ...')
    (train_caps,
     train_ims), (test_caps,
                  test_ims), _ = load_dataset(name=model_config.data,
                                              load_train=True)
    train_nic_ims = train_ims[:, 1536:]
    test_nic_ims = test_ims[:, 1536:]
    train_ims[:, 1536:] = preprocessing.scale(train_nic_ims)
    test_ims[:, 1536:] = preprocessing.scale(test_nic_ims)

    test_vgg_feature = test_ims[:, :1536]
    test_NIC_feature = test_ims[:, 1536:]

    #create and save dictionary
    print('creating dictionary')
    worddict = build_dictionary(train_caps + test_caps)[0]
    n_words = len(worddict)
    model_config.n_words = n_words
    model_config.worddict = worddict
    print('dictionary size: ' + str(n_words))
    with open('f8k.dictionary.pkl', 'wb') as f:
        pkl.dump(worddict, f)

    #Building the model
    print('Building the model ...')
    model = LTS(model_config)
    model.build()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    saver = tf.train.Saver(max_to_keep=model_config.max_checkpoints_to_keep)

    #sess = tf.Session(config=config)

    print('start embedding training')
    curr = 0.
    uidx = 0.
    train_iter = homogeneous_data.HomogeneousData(
        data=[train_caps, train_ims],
        batch_size=model_config.batch_size,
        maxlen=model_config.maxlen_w)
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(model_config.max_epochs):
            # Train G
            print('Epoch ', epoch)

            if epoch == 15:
                model_config.lrate = model_config.lrate / 10

            for x, im in train_iter:
                uidx += 1

                ls, mask, im = homogeneous_data.prepare_data(
                    caps=x,
                    features=im,
                    worddict=worddict,
                    maxlen=model_config.maxlen_w,
                    n_words=model_config.n_words)

                vgg_feature = im[:, :1536]
                NIC_feature = im[:, 1536:]

                #embedding training
                _, cost = sess.run(
                    [model.updates, model.embedding_loss],
                    feed_dict={
                        model.VGG_pred_data: vgg_feature,
                        model.NIC_pred_data: NIC_feature,
                        model.ls_pred_data: ls.T,
                        model.input_mask: mask.T,
                        model.keep_prob: 0.5,
                        model.phase: 1,
                        model.learning_rate: model_config.lrate
                    })

                if np.mod(uidx, 10) == 0:
                    print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost)

                if np.mod(uidx, 100) == 0:
                    print('test ...')

                    # encode images into the text embedding space
                    images = getTestImageFeature(sess, model, test_vgg_feature,
                                                 test_NIC_feature)
                    features = getTestTextFeature(sess, model, model_config,
                                                  test_caps)

                    (r1, r5, r10, medr) = recall.i2t(images, features)
                    print("Image to text: %.1f, %.1f, %.1f, %.1f" %
                          (r1, r5, r10, medr))
                    (r1i, r5i, r10i, medri) = recall.t2i(images, features)
                    print("Text to image: %.1f, %.1f, %.1f, %.1f" %
                          (r1i, r5i, r10i, medri))

                    currscore = r1 + r5 + r10 + r1i + r5i + r10i
                    if currscore > curr:
                        curr = currscore

                        # Save model
                        print('Saving...')
                        saver.save(sess,
                                   "checkpoint_files/model.ckpt",
                                   global_step=int(uidx + 1))
                        print('done.')

    sess = tf.Session()
    model_path = tf.train.latest_checkpoint("checkpoint_files/")
    if not model_path:
        print("Skipping testing. No checkpoint found in: %s",
              FLAGS.checkpoint_dir)
        return

    print("Loading model from checkpoint: %s", model_path)
    saver.restore(sess, model_path)
    print("Successfully loaded checkpoint: %s", model_path)

    images = getTestImageFeature(sess, model, test_vgg_feature,
                                 test_NIC_feature)

    # encode sentences into the text embedding space
    features = getTestTextFeature(sess, model, model_config, test_caps)

    (r1, r5, r10, medr) = recall.i2t(images, features)
    print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr))
    (r1i, r5i, r10i, medri) = recall.t2i(images, features)
    print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri))
Пример #8
0
def trainer(X, 
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            decoder='gru',
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder 
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    # TODO: if loading old parameters you need to make sure you are using them
    #  in the rest of the code
    # if reload_ and os.path.exists(saveto):
    #     print 'reloading...' + saveto
    #     with open('%s.pkl'%saveto, 'rb') as f:
    #         model_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto + '.npz', params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

        print 'Seen %d samples'%n_samples
Пример #9
0
def trainer(X, C, stmodel,
            dimctx=4800, #vector dimensionality
            dim_word=620, # word vector dimensionality
            dim=1600, # the number of GRU units
            encoder='gru',
            decoder='gru',
            doutput=False,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=40000,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 16,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            embeddings=None,
            saveFreq=1000,
            sampleFreq=100,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dimctx'] = dimctx
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['doutput'] = doutput
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['embeddings'] = embeddings
    model_options['saveFreq'] = saveFreq
    model_options['sampleFreq'] = sampleFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Load pre-trained embeddings, if applicable
    if embeddings != None:
        print 'Loading embeddings...'
        with open(embeddings, 'rb') as f:
            embed_map = pkl.load(f)
        dim_word = len(embed_map.values()[0])
        model_options['dim_word'] = dim_word
        preemb = norm_weight(n_words, dim_word)
        pz = defaultdict(lambda : 0)
        for w in embed_map.keys():
            pz[w] = 1
        for w in worddict.keys()[:n_words-2]:
            if pz[w] > 0:
                preemb[worddict[w]] = embed_map[w]
    else:
        preemb = None

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options, preemb=preemb)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    print 'Building sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, c in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, mask, ctx)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                params = unzip(tparams)
                numpy.savez(saveto, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                x_s = x
                mask_s = mask
                ctx_s = ctx
                for jj in xrange(numpy.minimum(10, len(ctx_s))):
                    sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options,
                                               trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False)
                    print 'Truth ',jj,': ',
                    for vv in x_s[:,jj]:
                        if vv == 0:
                            break
                        if vv in word_idict:
                            print word_idict[vv],
                        else:
                            print 'UNK',
                    print
                    for kk, ss in enumerate([sample[0]]):
                        print 'Sample (', kk,') ', jj, ': ',
                        for vv in ss:
                            if vv == 0:
                                break
                            if vv in word_idict:
                                print word_idict[vv],
                            else:
                                print 'UNK',
                    print

        print 'Seen %d samples'%n_samples
Пример #10
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',
            max_epochs=15,
            dispFreq=10,
            decay_c=0.0,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            lrate=0.0002,
            concat=True,
            reload_=False):

    hyper_params = {
        'data': data,
        'encoder': encoder,
        'batch_size': batch_size,
        'time': cur_time,
        'lrate': lrate,
        'concat': concat,
    }

    i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items())
    i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items())
    i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items())
    t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items())
    t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items())
    t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items())

    i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items())
    t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items())

    agent = Agent(port=5020)
    i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True)
    i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True)
    i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True)
    t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True)
    t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True)
    t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True)

    i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True)
    t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True)

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_
    model_options['concat'] = concat

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x_id, im = homogeneous_data.prepare_data(x,
                                                     im,
                                                     worddict,
                                                     maxlen=maxlen_w,
                                                     n_words=n_words)

            if x_id is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x_id = Variable(torch.from_numpy(x_id).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            ud_start = time.time()
            x, im = img_sen_model(x_id, im, x)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()
            ud = time.time() - ud_start

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu(
                ).numpy()[0], 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r1, r5, r10, medr = 0.0, 0.0, 0.0, 0
                r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0
                r_time = time.time()
                if data == 'arch' or data == 'arch_small':
                    (r1, r5, r10, medr) = i2t_arch(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i_arch(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)
                else:
                    (r1, r5, r10, medr) = i2t(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                record_num = uidx / validFreq
                agent.append(i2t_r1_agent, record_num, r1)
                agent.append(i2t_r5_agent, record_num, r5)
                agent.append(i2t_r10_agent, record_num, r10)
                agent.append(t2i_r1_agent, record_num, r1i)
                agent.append(t2i_r5_agent, record_num, r5i)
                agent.append(t2i_r10_agent, record_num, r10i)

                agent.append(i2t_med_agent, record_num, medr)
                agent.append(t2i_med_agent, record_num, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

        print 'Seen %d samples' % n_samples
Пример #11
0
 # Inverse dictionary    word_idict = dict() for kk, vv in worddict.iteritems():        word_idict[vv] = kk    word_idict[0] = '<eos>'    word_idict[1] = 'UNK'
 print 'Building model'    params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto):        params = load_params(saveto, params)
    tparams = init_tparams(params)
    trng, inps, cost = build_model(tparams, model_options)
 # before any regularizer print 'Building f_log_probs...',    f_log_probs = theano.function(inps, cost, profile=False) print 'Done'
 # weight decay, if applicable if decay_c > 0.:        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')        weight_decay = 0. for kk, vv in tparams.iteritems():            weight_decay += (vv ** 2).sum()        weight_decay *= decay_c        cost += weight_decay
 # after any regularizer print 'Building f_cost...',    f_cost = theano.function(inps, cost, profile=False) print 'Done'
 print 'Building sentence encoder'    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)    f_senc = theano.function(inps_se, sentences, profile=False)
 print 'Building image encoder'    trng, inps_ie, images = build_image_encoder(tparams, model_options)    f_ienc = theano.function(inps_ie, images, profile=False)
 print 'Building f_grad...',    grads = tensor.grad(cost, wrt=itemlist(tparams))    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)
 if grad_clip > 0.:        g2 = 0. for g in grads:            g2 += (g**2).sum()        new_grads = [] for g in grads:            new_grads.append(tensor.switch(g2 > (grad_clip**2),                                           g / tensor.sqrt(g2) * grad_clip,                                           g))        grads = new_grads
    lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters)    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
 print 'Optimization'
 # Each sentence in the minibatch have same length (for encoder)    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)
    uidx = 0    curr = 0.    n_samples = 0  for eidx in xrange(max_epochs):
 print 'Epoch ', eidx
 for x, im in train_iter:            n_samples += len(x)            uidx += 1
            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)
 if x == None: print 'Minibatch with zero sample under length ', maxlen_w                uidx -= 1 continue
 # Update            ud_start = time.time()            cost = f_grad_shared(x, mask, im)            f_update(lrate)            ud = time.time() - ud_start
 if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1.
 if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
 if numpy.mod(uidx, validFreq) == 0:
 print 'Computing results...'                curr_model = {}                curr_model['options'] = model_options                curr_model['worddict'] = worddict                curr_model['word_idict'] = word_idict                curr_model['f_senc'] = f_senc                curr_model['f_ienc'] = f_ienc
                ls = encode_sentences(curr_model, dev[0])                lim = encode_images(curr_model, dev[1])
                (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)                (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
                currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr:                    curr = currscore
 # Save model print 'Saving...',                    params = unzip(tparams)                    numpy.savez(saveto, **params)                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done'
 print 'Seen %d samples'%n_samples
if __name__ == '__main__': pass
Пример #12
0
def trainer(X, 
            dim_word=620, # word vector dimensionality
            dim=2400, # the number of GRU units
            encoder='gru',
            num_neg=4,
            gamma=1.0,
            max_epochs=5,
            dispFreq=1,
            decay_c=0.,
            grad_clip=5.,
            n_words=20000,
            maxlen_w=30,
            optimizer='adam',
            batch_size = 64,
            saveto='/u/rkiros/research/semhash/models/toy.npz',
            dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
            saveFreq=1000,
            reload_=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['num_neg'] = num_neg
    model_options['gamma'] = gamma
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, x, x_mask, p_f, p_f_mask, p_b, p_b_mask, \
          ns_list, ns_masks, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, num_neg=num_neg, maxlen=maxlen_w)

    uidx = 0
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, p_f, p_b, ns in train_iter:
            n_samples += len(x)
            uidx += 1

            # ns input is list of num_neg negative sentences, 
            # output ns is list of num_neg (batchsize, neg_len) negative sentences 
            x, x_mask, p_f, p_f_mask, p_b, p_b_mask, ns_list, ns_masks = homogeneous_data.prepare_data(x, p_f, p_b, ns, 
                                                                                worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            args = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks
            cost = f_grad_shared(*args)
            f_update(lrate)

            ud = time.time() - ud_start

            if numpy.isnan(cost_p1) or numpy.isinf(cost_p1) or numpy.isnan(cost_p2) or numpy.isinf(cost_p2):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost_p1 + cost_p2, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                saveto_idx = saveto.format(uidx)

                params = unzip(tparams)
                numpy.savez(saveto_idx, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl'%saveto_idx, 'wb'))
                print 'Done'

        print 'Seen %d samples'%n_samples
Пример #13
0
def trainer(data='f30k',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            max_epochs=15,
            encoder='lstm',
            dispFreq=10,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/f30K',
            validFreq=100,
            early_stop=20,
            lrate=1e-3,
            reload_=False):
    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    logging.info(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        logging.info('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    logging.info('loading dataset')
    titles, album_ims, artist, genre = load_dataset(data)
    artist_string = artist
    genre_string = genre

    # Create and save dictionary
    if os.path.exists('%s.dictionary.pkl' % saveto):
        logging.info('loading dict from...' + saveto)
        with open('%s.dictionary.pkl' % saveto, 'rb') as wdict:
            worddict = pkl.load(wdict)
        n_words = len(worddict)
        model_options['n_words'] = n_words
        logging.info('Dictionary size: ' + str(n_words))
    else:

        logging.info('Create dictionary')
        worddict = build_dictionary(titles + artist + genre)[0]
        n_words = len(worddict)
        model_options['n_words'] = n_words
        logging.info('Dictionary words: ' + str(n_words))
        with open('%s.dictionary.pkl' % saveto, 'wb') as f:
            pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.items():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData(
        [titles, album_ims, artist, genre],
        batch_size=batch_size,
        maxlen=maxlen_w)

    img_sen_model = Img_Sen_Artist_Ranking(model_options)
    # todo code to load saved model dict
    if os.path.exists('%s_model_%s.pkl' % (saveto, encoder)):
        logging.info('Loading model...')
        # pkl.dump(model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
        img_sen_model.load_state_dict(
            torch.load('%s_model_%s.pkl' % (saveto, encoder)))
        logging.info('Done')
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin).cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lr=lrate)
    scheduler = ReduceLROnPlateau(optimizer,
                                  factor=0.1,
                                  patience=40,
                                  mode='min',
                                  verbose=True,
                                  threshold=1e-8)

    uidx = 0
    curr = 0.0
    n_samples = 0

    # For Early-stopping
    best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0
    best_step = 0

    writer = SummaryWriter()
    for eidx in range(max_epochs):

        for x, im, artist, genre in train_iter:
            n_samples += len(x)
            uidx += 1

            x, im, artist, genre = homogeneous_data.prepare_data(
                x,
                im,
                artist,
                genre,
                worddict,
                maxlen=maxlen_w,
                n_words=n_words)

            if x is None:
                logging.info('Minibatch with zero sample under length ',
                             maxlen_w)
                uidx -= 1
                continue

            x = Variable(torch.from_numpy(x).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            artist = Variable(torch.from_numpy(artist).cuda())
            genre = Variable(torch.from_numpy(genre).cuda())
            # Update
            x1, im1, artist, genre = img_sen_model(x, im, artist, genre)

            #make validation on inout before trainer see it
            if numpy.mod(uidx, validFreq) == 0:
                img_sen_model.eval()
                with torch.no_grad():
                    print('Epoch ', eidx, '\tUpdate@ ', uidx, '\tCost ',
                          cost.data.item())
                    writer.add_scalar('Evaluation/Validation_Loss',
                                      cost.data.item(), uidx)
                    (r1, r5, r10, medr) = i2t(im1, x)  #distances with l2norm
                    logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" %
                                 (r1, r5, r10, medr))

                    (r1g, r5g, r10g, medrg) = i2t(im1, genre)
                    logging.info("Image to genre: %.1f, %.1f, %.1f, %.1f" %
                                 (r1g, r5g, r10g, medrg))

                    (r1a, r5a, r10a, medra) = i2t(im1, artist)
                    logging.info("Image to Artist: %.1f, %.1f, %.1f, %.1f" %
                                 (r1a, r5a, r10a, medra))

                    logging.info("Cal Recall@K ")
                    writer.add_scalars('Validation Recal/Image2Album', {
                        'r@1': r1,
                        'r@5': r5,
                        'r@10': r10
                    }, uidx)

                    writer.add_scalars('Validation Recal/Image2Genres', {
                        'r@1': r1g,
                        'r@5': r5g,
                        'r@10': r10g
                    }, uidx)

                    writer.add_scalars('Validation Recal/Image2Artist', {
                        'r@1': r1a,
                        'r@5': r5a,
                        'r@10': r5a
                    }, uidx)

                    curr_step = uidx / validFreq

                    currscore = r1 + r5 + r10 + r1a + r5a + r10a + r1g + r5g + r10g - medr - medrg - medra
                    if currscore > curr:
                        curr = currscore
                        best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                        best_r1g, best_r5g, best_r10g, best_medrg = r1, r5, r10, medrg
                        best_step = curr_step

                        # Save model
                        logging.info('Saving model...')
                        pkl.dump(
                            model_options,
                            open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                        torch.save(img_sen_model.state_dict(),
                                   '%s_model_%s.pkl' % (saveto, encoder))
                        logging.info('Done')

                    if curr_step - best_step > early_stop:
                        logging.info('early stopping, jumping now...')
                        logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" %
                                     (best_r1, best_r5, best_r10, best_medr))
                        logging.info(
                            "Image to genre: %.1f, %.1f, %.1f, %.1f" %
                            (best_r1g, best_r5g, best_r10g, best_medrg))

                        #return 0
                        lrate = 1e-4
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lrate

            img_sen_model.train()
            cost = loss_fn(im1, x1, artist, genre)
            writer.add_scalar('Evaluation/training_Loss', cost, uidx)

            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm_(params, grad_clip)

            scheduler.step(cost.data.item())
            optimizer.step()

        #scheduler.step(cost.data.item())
        logging.info('Seen %d samples' % n_samples)
Пример #14
0
def trainer(
        X,
        dim_word=620,  # word vector dimensionality
        dim=2400,  # the number of GRU units
        encoder='gru',
        decoder='gru',
        max_epochs=5,
        dispFreq=1,
        decay_c=0.,
        grad_clip=5.,
        n_words=20000,
        maxlen_w=30,
        optimizer='adam',
        batch_size=512,
        saveto='/u/rkiros/research/semhash/models/toy.npz',
        dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl',
        saveFreq=5000,
        reload_=False,
        reload_path='output_books_full/model_ae_full_bsz_64_iter_313000.npz',
        SICK_eval=False):

    # Model options
    model_options = {}
    model_options['dim_word'] = dim_word
    model_options['dim'] = dim
    model_options['encoder'] = encoder
    model_options['decoder'] = decoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['n_words'] = n_words
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['dictionary'] = dictionary
    model_options['saveFreq'] = saveFreq
    model_options['reload_'] = reload_
    model_options['reload_path'] = reload_path

    print model_options

    # reload options
    if reload_ and os.path.exists(reload_path):
        print 'reloading...' + reload_path
        with open('%s.pkl' % reload_path, 'rb') as f:
            models_options = pkl.load(f)

        reload_idx = int(reload_path.split('_')[-1].split('.')[0])

    # load dictionary
    print 'Loading dictionary...'
    worddict = load_dictionary(dictionary)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(reload_path):
        params = load_params(reload_path, params)

    tparams = init_tparams(params)

    trng, x, x_mask, y, y_mask, z, z_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask, z, z_mask]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Done'
    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    trainX = homogeneous_data.grouper(X)
    train_iter = homogeneous_data.HomogeneousData(trainX,
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    if not reload_:
        uidx = 0
    else:
        uidx = reload_idx
    lrate = 0.01
    for eidx in xrange(max_epochs):
        n_samples = 0

        print 'Epoch ', eidx

        for x, y, z in train_iter:
            n_samples += len(x)
            uidx += 1

            x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(
                x, y, z, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                saveto_iternum = saveto.format(uidx)

                params = unzip(tparams)
                numpy.savez(saveto_iternum, history_errs=[], **params)
                pkl.dump(model_options, open('%s.pkl' % saveto_iternum, 'wb'))
                print 'Done'

                if SICK_eval:
                    print "Evaluating SICK Test performance"
                    embed_map = tools.load_googlenews_vectors()
                    model = tools.load_model(path_to_model=saveto_iternum,
                                             embed_map=embed_map)
                    yhat, pr, sr, mse = eval_sick.evaluate(model,
                                                           evaltest=True)

                    del (model)
                    del (embed_map)
                    print pr, sr, mse

                    res_save_file = saveto.format('ALL').split(
                        '.')[0] + '_SICK_EVAL.txt'
                    with open(res_save_file, 'a') as rsf:
                        cur_time = strftime("%a, %d %b %Y %H:%M:%S +0000",
                                            gmtime())
                        rsf.write('\n \n {}'.format(cur_time))
                        rsf.write('\n{}, {}, {}, {}'.format(uidx, pr, se, mse))
                    print "Done"

        print 'Seen %d samples' % n_samples