def train_separate(task, dataset='1k', iterations=1, batch_size=32): # get data for task data, metadata = gather(dataset, task) # build data format dformat = ['contexts', 'questions', 'answers'] # create feeds trainfeed = DataFeed(dformat, data=data['train']) testfeed = DataFeed(dformat, data=data['test']) hdim = 20 if task else 50 eval_interval = 100 if task else 10 batch_size = 32 if task else 128 # instantiate model model = MemoryNet(hdim=20, num_hops=3, memsize=metadata['clen'], sentence_size=metadata['slen'], qlen=metadata['qlen'], vocab_size=metadata['vocab_size'], num_candidates=metadata['candidates']['vocab_size']) # info print(':: <task {}> [0/2] Info') print(':: \t memory size : {}, #candidates : {}'.format( metadata['clen'], metadata['candidates']['vocab_size'])) with tf.Session() as sess: # run for multiple initializations i, accuracy, model_params = 0, [0.], [None] while accuracy[-1] < 0.95 and i < iterations: # init session sess.run(tf.global_variables_initializer()) # create trainer trainer = Trainer(sess, model, trainfeed, testfeed, batch_size=batch_size) print('\n:: <task {}> ({}) [1/2] Pretraining'.format(task, i)) # pretrain acc = trainer.fit(epochs=100000, eval_interval=1, mode=Trainer.PRETRAIN, verbose=False, lr=0.0005) print(':: \tAccuracy after pretraining: ', acc) print('\n:: <task {}> ({}) [2/2] Training'.format(task, i)) # train acc = trainer.fit(epochs=1000000, eval_interval=eval_interval, mode=Trainer.TRAIN, verbose=False, lr=0.0005) print(':: \tAccuracy after training: ', acc) # next iteration i += 1 # add accuracy to list accuracy.append(acc) model_params.append(sess.run(tf.trainable_variables())) print(acc) print(':: [x/x] End of training') print(':: Max accuracy :', max(accuracy)) # return model and best model params return model, model_params[accuracy.index(max(accuracy))]
def train_separate_all(dataset='1k'): batch_size = 64 task_max_acc = [] for task in range(1, 21): # get task 1 #task = 18 data, metadata = gather('1k', task) # gather info from metadata num_candidates = metadata['candidates']['vocab_size'] vocab_size = metadata['vocab_size'] memsize = metadata['clen'] sentence_size = metadata['slen'] qlen = metadata['qlen'] print(':: <task {}> memory size : {}'.format(task, memsize)) # build data format dformat = ['contexts', 'questions', 'answers'] # create feeds trainfeed = DataFeed(dformat, data=data['train']) testfeed = DataFeed(dformat, data=data['test']) # instantiate model model = MemoryNet(hdim=20, num_hops=3, memsize=memsize, sentence_size=sentence_size, qlen=qlen, vocab_size=vocab_size, num_candidates=num_candidates) with tf.Session() as sess: # run for multiple initializations i, accuracy = 0, [0.] while accuracy[-1] < 0.95 and i < 5: # init session sess.run(tf.global_variables_initializer()) # create trainer trainer = Trainer(sess, model, trainfeed, testfeed, batch_size=batch_size) print('\n:: <task {}> ({}) [1/2] Pretraining'.format(task, i)) # pretrain acc = trainer.fit(epochs=100000, eval_interval=1, mode=Trainer.PRETRAIN, verbose=False, batch_size=64, lr=0.0005) print(':: \tAccuracy after pretraining: ', acc) print('\n:: <task {}> ({}) [2/2] Training'.format(task, i)) # train acc = trainer.fit(epochs=1000000, eval_interval=10, mode=Trainer.TRAIN, verbose=False, batch_size=64, lr=0.0005) print(':: \tAccuracy after training: ', acc) # next iteration i += 1 # add accuracy to list accuracy.append(acc) print(acc) print('Experiment Results : ') for i, a in enumerate(accuracy[1:]): print(i, a) task_max_acc.append(max(accuracy)) print('____________________________________________') for i, acc in enumerate(task_max_acc): print('Task ({}) : {}'.format(i + 1, acc)) print('____________________________________________')
def train_separate(task, dataset='1k', iterations=1, batch_size=128): # get data for task data, metadata = gather(dataset, task) # build data format dformat = ['contexts', 'questions', 'answers'] # create feeds trainfeed = DataFeed(dformat, data=data['train']) testfeed = DataFeed(dformat, data=data['test']) # instantiate model model = RelationNet(clen=metadata['clen'], qlen=metadata['qlen'], slen=metadata['slen'], vocab_size=metadata['vocab_size'], num_candidates=metadata['candidates']['vocab_size']) # info print(':: <task {}> [0/2] Info') print(':: \t memory size : {}, #candidates : {}'.format( metadata['clen'], metadata['candidates']['vocab_size'])) # create visualizer vis = Visualizer() vis.attach_scalars(model) with tf.Session() as sess: # run for multiple initializations i, accuracy, model_params = 0, [0.], [None] while accuracy[-1] < 0.95 and i < iterations: # init session sess.run(tf.global_variables_initializer()) # add graph to visualizer vis.attach_graph(sess.graph) # create trainer trainer = Trainer(sess, model, trainfeed, testfeed, batch_size=batch_size) print('\n:: <task {}> ({}) [1/1] Training'.format(task, i)) # train acc = trainer.fit(epochs=1000000, eval_interval=1, mode=Trainer.TRAIN, verbose=True, lr=0.0002) print(':: \tAccuracy after training: ', acc) # next iteration i += 1 # add accuracy to list accuracy.append(acc) model_params.append(sess.run(tf.trainable_variables())) print(acc) print(':: [x/x] End of training') print(':: Max accuracy :', max(accuracy)) # return model and model params return model, sess.run(tf.trainable_variables())
#config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # init session sess.run(tf.global_variables_initializer()) # add graph to visualizer vis.attach_graph(sess.graph) # init trainer trainer = Trainer(sess, model, datasrc, batch_size) # fit model trainer.fit(epochs=600, mode=Trainer.TRAIN, verbose=True, visualizer=vis, eval_interval=1, early_stop=False) ''' print('****************************************************************** PRETRAINING OVER ') for task_id in reversed(range(21)): datasrc.task_id = task_id loss, acc = trainer.evaluate() print('evaluation loss for task_id = {}\t\tloss = {}\t\t accuracy = {}'.format(task_id, loss, acc)) trainer.fit(epochs=600, mode=Trainer.TRAIN, verbose=False, visualizer=vis) print('****************************************************************** TRAINING OVER ') for task_id in reversed(range(21)): datasrc.task_id = task_id loss, acc = trainer.evaluate() print('evaluation loss for task_id = {}\t\tloss = {}\t\t accuracy = {}'.format(task_id, loss, acc))
n=1, optimizer=tf.train.AdamOptimizer, lr=0.001, vocab_size=metadata['vocab_size'], max_candidates=metadata['max_candidates'], demb=384, dhdim=384, num_layers=1) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: # init session sess.run(tf.global_variables_initializer()) # create trainer trainer = Trainer(sess, model, trainfeed, testfeed, batch_size=batch_size) # train acc = trainer.fit(epochs=1000000, eval_interval=1, mode=Trainer.TRAIN, verbose=True, lr=0.001) print(':: \tAccuracy after training: ', acc)
# make 'n' copies of model for data parallelism make_parallel(model, num_copies=4, num_gpus=4) # setup visualizer # by default, writes to ./log/ vis = Visualizer(interval=50) vis.attach_scalars(model) vis.attach_params() # histograms of trainable variables # create data source (SQuAD) datasrc = DataSource(batch_size, glove_file='../../../datasets/glove/glove.6B.300d.txt', random_x=0.2) # gpu config config = tf.ConfigProto() #config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # init session sess.run(tf.global_variables_initializer()) vis.attach_graph(sess.graph) # init trainer trainer = Trainer(sess, model, datasrc, batch_size, rand=True) # fit model trainer.fit(epochs=1000, visualizer=vis)