def init_model(word2id): # initialize the model model = nn.Sequential( nn.Embedding(len(word2id), opt.rnn_input), rnn.RNNModel(opt.rnn_input, opt.rnn_output, opt.hidden_dim, opt.num_layers, opt.dropout, device)).to(device) optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate) return model, optimizer
def _run_trainer(): r""" The trainer creates a distributed RNNModel and a DistributedOptimizer. Then, it performs training using random input data. """ batch = 5 ntoken = 7 ninp = 2 nhid = 3 nindices = 6 nlayers = 4 hidden = ( torch.randn(nlayers, nindices, nhid), torch.randn(nlayers, nindices, nhid), ) model = rnn.RNNModel("ps", ntoken, ninp, nhid, nlayers) # setup distributed optimizer opt = DistributedOptimizer( optim.SGD, model.parameter_rrefs(), lr=0.05, ) criterion = torch.nn.CrossEntropyLoss() def get_next_batch(): for _ in range(5): data = torch.LongTensor(batch, nindices) % ntoken target = torch.LongTensor(batch, ntoken) % nindices yield data, target # train for 10 iterations for epoch in range(10): # create distributed autograd context for data, target in get_next_batch(): with dist_autograd.context() as context_id: hidden[0].detach_() hidden[1].detach_() output, hidden = model(data, hidden) loss = criterion(output, target) # run distributed backward pass dist_autograd.backward(context_id, [loss]) # run distributed optimizer opt.step(context_id) # not necessary to zero grads as each iteration creates a different # distributed autograd context which hosts different grads print("Training epoch {}".format(epoch))
corpus = data.Corpus(args.data) eval_batch_size = 10 test_batch_size = 1 train_data = batchify(corpus.train, args.batch_size, args) val_data = batchify(corpus.valid, eval_batch_size, args) test_data = batchify(corpus.test, test_batch_size, args) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = rnn.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied) if args.cuda: model.cuda() total_params = sum(x.size()[0] * x.size()[1] if len(x.size()) > 1 else x.size()[0] for x in model.parameters()) print('Args:', args) print('Model total parameters:', total_params) criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ###############################################################################
]) #Load word vocab (generated in run.py) word2id = None with open("dict/word2id.json") as f: for line in f: word2id = json.loads(line) id2word = None with open("dict/id2word.json") as f: for line in f: id2word = json.loads(line) #Load trained model model = nn.Sequential( nn.Embedding(len(word2id), opt.rnn_input), rnn.RNNModel(opt.rnn_input, opt.rnn_output, opt.hidden_dim, opt.num_layers, opt.dropout, device)).to(device) model.load_state_dict(torch.load(opt.model)) # Generate natural language command from templates, given verb-object pair def gen_from_template(verb, obj): pre_obj = [ 'Give me the ', 'Hand me the ', 'Pass me the ', 'Fetch the ', 'Get the ', 'Bring the ', 'Bring me the ', 'I need the ', 'I want the ', 'I need a ', 'I want a ' ] pre_verb = [ 'An item that can ', 'An object that can ', 'Give me something that can ', 'Give me an item that can ', 'Hand me something with which I can ', 'Give me something with which I can ', 'Hand me something to ',
out_path = "../data/rnn_hyperparameters/" + sys.argv[1] + ".pickle" resolution = 100 num_states = 2 # Generate an X matrix for RNN training random_loops = [data[i] for i in random.sample(range(len(data)), 100)] del data X, ranges = generate_X(random_loops, spacing=resolution) Y_numerical = np.random.randint(0, num_states, size=X.shape[0]) Y = np.zeros((X.shape[0], num_states)) for i in range(Y_numerical.shape[0]): Y[i,Y_numerical[i]] = 1 print(X.shape, Y.shape) consistencies = np.zeros((6, 6)) for i, rec_nodes in enumerate([[5], [10], [25], [50], [25, 25], [50, 50]]): for j, dense_nodes in enumerate([[5], [10], [25], [50], [25, 25], [50, 50]]): print(rec_nodes, dense_nodes) model = rnn.RNNModel(recurrent_nodes=rec_nodes, dense_nodes=dense_nodes, n_labels=num_states, n_features=X.shape[1], sequence_length=X.shape[2]) model.create() model.train(X, Y, epochs=5) Y_pred = np.argmax(model.model.predict(X), axis=1) my_consistencies = [evaluate_consistency(random_loops[k][1], Y_pred[start:end], resolution, num_states) for k, (start, end) in enumerate(ranges)] consistencies[i, j] = sum(my_consistencies) / len(my_consistencies) print(i, j, consistencies[i, j]) with open(out_path, 'wb') as file: pickle.dump(consistencies, file)
return data eval_batch_size = 10 train_data = batchify(corpus.train, args.batch_size) val_data = batchify(corpus.valid, eval_batch_size) test_data = batchify(corpus.test, eval_batch_size) ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) lr = theano.shared(getattr(numpy, theano.config.floatX)(args.lr)) optimizer = t721.optimizer.SGD(lr) model = rnn.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied, optimizer) with open(args.save, 'wb') as f: pickle.dump(model, f) ############################################################################### # Training code ############################################################################### def get_batch(source, i): seq_len = min(args.bptt, len(source) - 1 - i) data = source[i:i + seq_len].numpy() target = source[i + 1:i + 1 + seq_len].view(-1).numpy() return data, target
def train(params): # fix random seed np.random.seed(params.random_seed) print('%s starting......' % params.cell) if params.dataset.startswith('mnist'): train_X, test_X, train_y, test_y = load.load_mnist(params) elif params.dataset.startswith( 'sine_synthetic' ) and not params.dataset.startswith('sine_synthetic_out'): train_X, test_X, train_y, test_y = load.load_sine_synthetic(params) elif params.dataset.startswith('poly_synthetic'): train_X, test_X, train_y, test_y = load.load_poly_synthetic(params) else: assert 0, "unknown dataset %s" % (params.dataset) #params.freqs = np.logspace(np.log2(0.25), np.log2(params.time_steps/3), 120-1, base=2).tolist() #params.freqs.append(0.0) #params.freqs.sort() #params.freqs = np.linspace(0, params.time_steps/3, 10).tolist() print "parameters = ", params model = rnn.RNNModel(params) # load model if params.load_model: model.load("%s.%s" % (params.model_dir, params.cell)) # train model train_error, test_error = model.train(params, train_X, train_y, test_X, test_y) # save model if params.model_dir: if os.path.isdir(os.path.dirname(params.model_dir)) == False: os.makedirs(params.model_dir) model.save("%s.%s" % (params.model_dir, params.cell)) # predict train_pred = model.predict(train_X, params.batch_size) test_pred = model.predict(test_X, params.batch_size) # must close model when finish model.close() # write prediction to file if params.pred_dir: if os.path.isdir(os.path.dirname(params.pred_dir)) == False: os.makedirs(params.pred_dir) with open( "%s.%s.%s.y" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for pred in [train_pred, test_pred]: for entry in pred: for index, value in enumerate(entry): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) with open( "%s.%s.%s.X" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for X in [train_X, test_X]: for entry in X: for index, value in enumerate(entry.ravel()): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) return train_error, test_error
for w in compound_word: try: vector += word2vector[word] except KeyError: vector += mean_vector vector /= len(compound_word) embedding_matrix[i - 1] = word_vector f.close() ############################################################################### # Build the model ############################################################################### ntokens = len(corpus.dictionary) model = rnn.RNNModel(args.model, ntokens, args.embdims, args.nunits, args.nlayers, embedding_matrix, args.bidir, args.dropout, args.tied) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss() ############################################################################### # Training code ############################################################################### def repackage_hidden(h): """Wraps hidden states in new Variables, to detach them from their history.""" if type(h) == Variable: return Variable(h.data)
############################################################################### # Build the model ############################################################################### from splitcross import SplitCrossEntropyLoss # criterion = None weights = torch.ones([ntokens]).cuda() weights[-1:] = 0 # print(weights) criterion = nn.CrossEntropyLoss(weight=weights).cuda() model = model_lm.RNNModel(args.model, ntokens, ntypes, nvalues, args.emsize, args.nhid, args.emsize_type, args.emsize_value, args.nhid_ast, args.nlayers, args.dropout, args.dropouth, args.dropouti, args.dropoute, args.wdrop, args.tied) model_mlp = nn.Sequential( # nn.Dropout(0.5), nn.Linear(args.nhid + args.nhid_ast, args.nhid), # nn.LayerNorm(args.nhid), # nn.Tanh(), nn.Dropout(0.5), # nn.Linear(args.nhid, args.nhid), # nn.ReLU() ) ### if args.resume:
def train(params): print('%s starting......' % params.cell) sys.stdout.flush() if params.dataset.startswith('mnist'): train_X, test_X, train_y, test_y = load.load_mnist(params) elif params.dataset.startswith('add'): train_X, test_X, train_y, test_y = load.adding_task(params) else: assert 0, "unknown dataset %s" % (params.dataset) print ("parameters = ", params) class List: def __init__(self): self.list = list() def append(self, item): self.list.append(item) model = rnn.RNNModel(params) # load model if params.load_model: model.load("%s" % (params.load_model_dir)) # train model train_error, test_error,epochs = model.train(params, train_X, train_y, test_X, test_y) #save data to file(Egor) with open('data_'+params.cell+'_dataset_'+params.dataset+'_L_'+str(params.num_layers)+'_rsize_'+str(params.r_size) + '_lr_decay_' + str(params.lr_decay) + '_batch_size_' + str(params.batch_size),'w') as file: for i in range(len(train_error)): file.write(str(epochs[i])+' '+str(train_error[i])+' '+str(test_error[i])+'\n') # save model if params.model_dir: if os.path.isdir(os.path.dirname(params.model_dir)) == False: os.makedirs(params.model_dir) model.save("%s.%s" % (params.model_dir, params.cell)) # predict train_pred = model.predict(train_X, params.batch_size) test_pred = model.predict(test_X, params.batch_size) # must close model when finish model.close() # write prediction to file if params.pred_dir: if os.path.isdir(os.path.dirname(params.pred_dir)) == False: os.makedirs(params.pred_dir) with open("%s.%s.%s.y" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for pred in [train_pred, test_pred]: for entry in pred: for index, value in enumerate(entry): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) with open("%s.%s.%s.X" % (params.pred_dir, params.dataset, params.cell), "w") as f: content = "" for X in [train_X, test_X]: for entry in X: for index, value in enumerate(entry.ravel()): if index: content += "," content += "%f" % (value) content += "\n" f.write(content) return train_error, test_error