示例#1
0
def train_separate(task, dataset='1k', iterations=1, batch_size=32):

    # get data for task
    data, metadata = gather(dataset, task)

    # build data format
    dformat = ['contexts', 'questions', 'answers']

    # create feeds
    trainfeed = DataFeed(dformat, data=data['train'])
    testfeed = DataFeed(dformat, data=data['test'])

    hdim = 20 if task else 50
    eval_interval = 100 if task else 10
    batch_size = 32 if task else 128

    # instantiate model
    model = MemoryNet(hdim=20,
                      num_hops=3,
                      memsize=metadata['clen'],
                      sentence_size=metadata['slen'],
                      qlen=metadata['qlen'],
                      vocab_size=metadata['vocab_size'],
                      num_candidates=metadata['candidates']['vocab_size'])

    # info
    print(':: <task {}> [0/2] Info')
    print(':: \t memory size : {}, #candidates : {}'.format(
        metadata['clen'], metadata['candidates']['vocab_size']))

    with tf.Session() as sess:
        # run for multiple initializations
        i, accuracy, model_params = 0, [0.], [None]
        while accuracy[-1] < 0.95 and i < iterations:
            # init session
            sess.run(tf.global_variables_initializer())

            # create trainer
            trainer = Trainer(sess,
                              model,
                              trainfeed,
                              testfeed,
                              batch_size=batch_size)

            print('\n:: <task {}> ({}) [1/2] Pretraining'.format(task, i))
            # pretrain
            acc = trainer.fit(epochs=100000,
                              eval_interval=1,
                              mode=Trainer.PRETRAIN,
                              verbose=False,
                              lr=0.0005)

            print(':: \tAccuracy after pretraining: ', acc)

            print('\n:: <task {}> ({}) [2/2] Training'.format(task, i))
            # train
            acc = trainer.fit(epochs=1000000,
                              eval_interval=eval_interval,
                              mode=Trainer.TRAIN,
                              verbose=False,
                              lr=0.0005)

            print(':: \tAccuracy after training: ', acc)

            # next iteration
            i += 1
            # add accuracy to list
            accuracy.append(acc)
            model_params.append(sess.run(tf.trainable_variables()))
            print(acc)

        print(':: [x/x] End of training')
        print(':: Max accuracy :', max(accuracy))

        # return model and best model params
        return model, model_params[accuracy.index(max(accuracy))]
示例#2
0
def train_separate_all(dataset='1k'):

    batch_size = 64

    task_max_acc = []
    for task in range(1, 21):
        # get task 1
        #task = 18
        data, metadata = gather('1k', task)

        # gather info from metadata
        num_candidates = metadata['candidates']['vocab_size']
        vocab_size = metadata['vocab_size']
        memsize = metadata['clen']
        sentence_size = metadata['slen']
        qlen = metadata['qlen']

        print(':: <task {}> memory size : {}'.format(task, memsize))

        # build data format
        dformat = ['contexts', 'questions', 'answers']

        # create feeds
        trainfeed = DataFeed(dformat, data=data['train'])
        testfeed = DataFeed(dformat, data=data['test'])

        # instantiate model
        model = MemoryNet(hdim=20,
                          num_hops=3,
                          memsize=memsize,
                          sentence_size=sentence_size,
                          qlen=qlen,
                          vocab_size=vocab_size,
                          num_candidates=num_candidates)

        with tf.Session() as sess:
            # run for multiple initializations
            i, accuracy = 0, [0.]
            while accuracy[-1] < 0.95 and i < 5:
                # init session
                sess.run(tf.global_variables_initializer())

                # create trainer
                trainer = Trainer(sess,
                                  model,
                                  trainfeed,
                                  testfeed,
                                  batch_size=batch_size)

                print('\n:: <task {}> ({}) [1/2] Pretraining'.format(task, i))
                # pretrain
                acc = trainer.fit(epochs=100000,
                                  eval_interval=1,
                                  mode=Trainer.PRETRAIN,
                                  verbose=False,
                                  batch_size=64,
                                  lr=0.0005)
                print(':: \tAccuracy after pretraining: ', acc)

                print('\n:: <task {}> ({}) [2/2] Training'.format(task, i))
                # train
                acc = trainer.fit(epochs=1000000,
                                  eval_interval=10,
                                  mode=Trainer.TRAIN,
                                  verbose=False,
                                  batch_size=64,
                                  lr=0.0005)
                print(':: \tAccuracy after training: ', acc)

                # next iteration
                i += 1
                # add accuracy to list
                accuracy.append(acc)
                print(acc)

            print('Experiment Results : ')
            for i, a in enumerate(accuracy[1:]):
                print(i, a)

        task_max_acc.append(max(accuracy))

    print('____________________________________________')
    for i, acc in enumerate(task_max_acc):
        print('Task ({}) : {}'.format(i + 1, acc))
    print('____________________________________________')
示例#3
0
def train_separate(task, dataset='1k', iterations=1, batch_size=128):

    # get data for task
    data, metadata = gather(dataset, task)

    # build data format
    dformat = ['contexts', 'questions', 'answers']

    # create feeds
    trainfeed = DataFeed(dformat, data=data['train'])
    testfeed = DataFeed(dformat, data=data['test'])

    # instantiate model
    model = RelationNet(clen=metadata['clen'],
                        qlen=metadata['qlen'],
                        slen=metadata['slen'],
                        vocab_size=metadata['vocab_size'],
                        num_candidates=metadata['candidates']['vocab_size'])

    # info
    print(':: <task {}> [0/2] Info')
    print(':: \t memory size : {}, #candidates : {}'.format(
        metadata['clen'], metadata['candidates']['vocab_size']))

    # create visualizer
    vis = Visualizer()
    vis.attach_scalars(model)

    with tf.Session() as sess:
        # run for multiple initializations
        i, accuracy, model_params = 0, [0.], [None]
        while accuracy[-1] < 0.95 and i < iterations:
            # init session
            sess.run(tf.global_variables_initializer())

            # add graph to visualizer
            vis.attach_graph(sess.graph)

            # create trainer
            trainer = Trainer(sess,
                              model,
                              trainfeed,
                              testfeed,
                              batch_size=batch_size)

            print('\n:: <task {}> ({}) [1/1] Training'.format(task, i))
            # train
            acc = trainer.fit(epochs=1000000,
                              eval_interval=1,
                              mode=Trainer.TRAIN,
                              verbose=True,
                              lr=0.0002)

            print(':: \tAccuracy after training: ', acc)

            # next iteration
            i += 1
            # add accuracy to list
            accuracy.append(acc)
            model_params.append(sess.run(tf.trainable_variables()))
            print(acc)

        print(':: [x/x] End of training')
        print(':: Max accuracy :', max(accuracy))

        # return model and model params
        return model, sess.run(tf.trainable_variables())
示例#4
0
    #config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        # init session
        sess.run(tf.global_variables_initializer())

        # add graph to visualizer
        vis.attach_graph(sess.graph)

        # init trainer
        trainer = Trainer(sess, model, datasrc, batch_size)

        # fit model
        trainer.fit(epochs=600,
                    mode=Trainer.TRAIN,
                    verbose=True,
                    visualizer=vis,
                    eval_interval=1,
                    early_stop=False)
        '''
        print('****************************************************************** PRETRAINING OVER ')
        for task_id in reversed(range(21)):
            datasrc.task_id = task_id
            loss, acc = trainer.evaluate()
            print('evaluation loss for task_id = {}\t\tloss = {}\t\t accuracy = {}'.format(task_id, loss, acc))
        
        trainer.fit(epochs=600, mode=Trainer.TRAIN, verbose=False, visualizer=vis)
        print('****************************************************************** TRAINING OVER ')
        for task_id in reversed(range(21)):
            datasrc.task_id = task_id
            loss, acc = trainer.evaluate()
            print('evaluation loss for task_id = {}\t\tloss = {}\t\t accuracy = {}'.format(task_id, loss, acc))
示例#5
0
                  n=1,
                  optimizer=tf.train.AdamOptimizer,
                  lr=0.001,
                  vocab_size=metadata['vocab_size'],
                  max_candidates=metadata['max_candidates'],
                  demb=384,
                  dhdim=384,
                  num_layers=1)

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:

        # init session
        sess.run(tf.global_variables_initializer())

        # create trainer
        trainer = Trainer(sess,
                          model,
                          trainfeed,
                          testfeed,
                          batch_size=batch_size)

        # train
        acc = trainer.fit(epochs=1000000,
                          eval_interval=1,
                          mode=Trainer.TRAIN,
                          verbose=True,
                          lr=0.001)

        print(':: \tAccuracy after training: ', acc)
示例#6
0
    # make 'n' copies of model for data parallelism
    make_parallel(model, num_copies=4, num_gpus=4)

    # setup visualizer
    #  by default, writes to ./log/
    vis = Visualizer(interval=50)
    vis.attach_scalars(model)
    vis.attach_params() # histograms of trainable variables


    # create data source (SQuAD)
    datasrc = DataSource(batch_size, 
            glove_file='../../../datasets/glove/glove.6B.300d.txt', 
            random_x=0.2)

    # gpu config
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        # init session
        sess.run(tf.global_variables_initializer())

        vis.attach_graph(sess.graph)

        # init trainer
        trainer = Trainer(sess, model, datasrc, batch_size, rand=True)

        # fit model
        trainer.fit(epochs=1000, visualizer=vis)