def get_trainable_model(): return model.GRU(NUM_EMB, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, learning_rate=LEARNING_RATE)
def get_trainable_model(): """ Creates GRU object, which extends Recurrent Neural Network class in model.py""" return model.GRU(NUM_EMB, EMB_DIM, HIDDEN_DIM, SEQ_LENGTH, START_TOKEN, learning_rate=LEARNING_RATE)
def main(): batch_size = 16 num_epochs = 2 #epoch size must be <= 2 save_path = './model/' word_emb, train, dev = init_data() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True gpu_config = "/gpu:0" max_acc = 0.0 with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.contrib.layers.xavier_initializer() with tf.variable_scope("model", reuse=None, initializer=initializer): re_model = model.GRU(True, word_emb) global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(0.001).minimize( re_model.final_loss, global_step=global_step) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() max_acc = 0.0 print("Training model...") num_iterations = int(math.ceil(1.0 * len(train[0]) / batch_size)) for epoch in range(num_epochs): print("Epoch: ", epoch) #shuffle the examples sh_index = np.arange(len(train[0])) np.random.shuffle(sh_index) for i in range(len(train)): train[i] = train[i][sh_index] for iteration in range(num_iterations): #get a batch word_batch, pos1_batch, pos2_batch, ent_batch, y_batch = get_next_batch( train, iteration * batch_size, batch_size) train_shape = [] train_word = [] train_pos1 = [] train_pos2 = [] train_ent_type = [] #train_partofspeech = [] train_word_num = 0 #process the batches for i in range(len(word_batch)): train_shape.append(train_word_num) train_word_num += len(word_batch[i]) train_word.extend([word for word in word_batch[i]]) train_pos1.extend([pos1 for pos1 in pos1_batch[i]]) train_pos2.extend([pos2 for pos2 in pos2_batch[i]]) train_ent_type.extend([ent for ent in ent_batch[i]]) #train_partofspeech.extend([pos for pos in partspeech_batch[i]]) train_shape.append(train_word_num) train_shape = np.array(train_shape) train_word = np.array(train_word) train_pos1 = np.array(train_pos1) train_pos2 = np.array(train_pos2) #train_partofspeech = np.array(train_partofspeech) train_ent_type = np.array(train_ent_type) _, step, train_loss, train_acc, _, _ = sess.run( [ train_op, global_step, re_model.total_loss, re_model.accuracy, re_model.l2_loss, re_model.final_loss ], feed_dict={ re_model.input_shape: train_shape, re_model.input_word: train_word, re_model.input_pos1: train_pos1, re_model.input_pos2: train_pos2, re_model.input_ent_type: train_ent_type, #re_model.input_speech:train_partofspeech, re_model.input_y: y_batch }) if step % 50 == 0: train_acc = np.reshape(np.array(train_acc), (batch_size)) train_acc = np.mean(train_acc) print("step {}, loss {:g}, train accuracy {:g}".format( step, train_loss, train_acc)) if step % 100 == 0: # perform validation dev_order = list(range(len(dev[0]))) random_idx = random.randint( 0, int(len(dev_order) / float(batch_size))) word_batch, pos1_batch, pos2_batch, ent_batch, dev_y = get_next_batch( dev, random_idx * batch_size, batch_size) dev_shape = [] dev_word = [] dev_pos1 = [] dev_pos2 = [] dev_ent_type = [] #dev_partofspeech = [] dev_word_num = 0 for i in range(len(word_batch)): dev_shape.append(dev_word_num) dev_word_num += len(word_batch[i]) dev_word.extend([word for word in word_batch[i]]) dev_pos1.extend([pos1 for pos1 in pos1_batch[i]]) dev_pos2.extend([pos2 for pos2 in pos2_batch[i]]) dev_ent_type.extend([ent for ent in ent_batch[i]]) #dev_partofspeech.extend([pos for pos in speech_batch[i]]) dev_shape.append(dev_word_num) dev_shape = np.array(dev_shape) dev_word = np.array(dev_word) dev_pos1 = np.array(dev_pos1) dev_pos2 = np.array(dev_pos2) #dev_partofspeech = np.array(dev_partofspeech) dev_ent_type = np.array(dev_ent_type) dev_loss, dev_acc = sess.run( [re_model.total_loss, re_model.accuracy], feed_dict={ re_model.input_shape: dev_shape, re_model.input_word: dev_word, re_model.input_pos1: dev_pos1, re_model.input_pos2: dev_pos2, re_model.input_ent_type: dev_ent_type, #re_model.input_speech:dev_partofspeech, re_model.input_y: dev_y }) dev_acc = np.reshape(np.array(dev_acc), (batch_size)) dev_acc = np.mean(dev_acc) print("dev performance: accuracy {:g}".format(dev_acc)) if max_acc < dev_acc: max_acc = dev_acc saver.save( sess, save_path + str(epoch) + '_RE_model.ckpt')
args.cuda = (not args.no_cuda) and torch.cuda.is_available() del args.no_cuda args.save_dir = os.path.join( args.save_dir, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) print("\nParameters:") for attr, value in sorted(args.__dict__.items()): print("\t{}={}".format(attr.upper(), value)) # model m_model = None if args.snapshot is None: if args.which_model == 'lstm': m_model = model.LSTM(args, m_embedding) elif args.which_model == 'gru': m_model = model.GRU(args, m_embedding) elif args.which_model == 'rnn': m_model = model.RNN(args, m_embedding) else: print('\nLoading model from [%s]...' % args.snapshot) try: m_model = torch.load(args.snapshot) except: print("Sorry, This snapshot doesn't exist.") exit() if args.cuda: m_model = m_model.cuda() # train or predict assert m_model is not None
def main(): with tf.Session() as sess: initializer = tf.contrib.layers.xavier_initializer() with tf.variable_scope("model", reuse=None, initializer=initializer): re_model = model.GRU(False, word_emb) print("loading model parameter...") saver = tf.train.Saver() saver.restore(sess, model_path) print("Testing...") #get file name/path lists from a txt file with open("./vectorized_data/vectorized_test/file_list.txt", "r") as f: files = f.readlines() for name in files: name = name.strip() print("Predicting file: " + name) if not name: continue test, test_ann, test_pairs = load_file(name) test_order = list(range(len(test[0]))) num_iterations = int(math.ceil(1.0 * len(test_order) / batch_size)) y_hat = [] #predication for i in range(num_iterations): word_batch, pos1_batch, pos2_batch, ent_type_batch, y_batch = get_next_batch(test, i * batch_size, batch_size) test_shape = [] test_word = [] test_pos1 = [] test_pos2 = [] #test_speech = [] test_ent_type = [] test_word_num = 0 for i in range(len(word_batch)): test_shape.append(test_word_num) test_word_num += len(word_batch[i]) test_word.extend([word for word in word_batch[i]]) test_pos1.extend([pos1 for pos1 in pos1_batch[i]]) test_pos2.extend([pos2 for pos2 in pos2_batch[i]]) #test_speech.extend([pos for pos in speech_batch[i]]) test_ent_type.extend([ent for ent in ent_type_batch[i]]) test_shape.append(test_word_num) test_shape = np.array(test_shape) test_word = np.array(test_word) test_pos1 = np.array(test_pos1) test_pos2 = np.array(test_pos2) #test_speech = np.array(test_speech) test_ent_type = np.array(test_ent_type) pred = sess.run([ re_model.predictions], feed_dict={ re_model.input_shape:test_shape, re_model.input_word:test_word, re_model.input_pos1:test_pos1, re_model.input_pos2:test_pos2, re_model.input_ent_type:test_ent_type, #re_model.input_speech:test_speech, re_model.input_y:y_batch }) y_hat += list(pred[0]) #output results to files build_output_file(len(test[0]), y_hat, test_pairs, test_ann, name)
def run(batch_size, permuted, modeltype='surprise_gru', n_hidden=64, zoneout=0.25, layer_norm=True, optimizer='adam', learnrate=1e-3, aux_weight=0.1, cuda=True, resume=False): assert isinstance(batch_size, int) assert isinstance(permuted, bool) assert modeltype in MODELS_IMPLEMENTED assert isinstance(n_hidden, int) assert isinstance(zoneout, (int, float)) assert isinstance(layer_norm, bool) assert isinstance(optimizer, str) assert isinstance(learnrate, (int, float)) assert isinstance(cuda, bool) assert isinstance(resume, bool) # Name the experiment s.t. parameters are easily readable exp_name = ( '%s_perm%r_h%i_z%2f_norm%r_%s' % (modeltype, permuted, n_hidden, zoneout, layer_norm, optimizer)) exp_path = os.path.join('/home/jason/experiments/recurrent_pytorch/', exp_name) if not os.path.isdir(exp_path): os.makedirs(exp_path) if not resume: # Store experiment params in params.json params = { 'batch_size': batch_size, 'permuted': permuted, 'modeltype': modeltype, 'n_hidden': n_hidden, 'zoneout': zoneout, 'layer_norm': layer_norm, 'optimizer': optimizer, 'learnrate': learnrate, 'aux_weight': aux_weight, 'cuda': cuda } with open(os.path.join(exp_path, 'params.json'), 'w') as f: json.dump(params, f) # Model if modeltype.lower() == 'rnn': net = model.RNN(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'gru': net = model.GRU(1, n_hidden, 10, layer_norm) elif modeltype.lower() == 'surprise_gru': net = model.SurpriseGRU(1, n_hidden, 10, layer_norm) else: raise ValueError else: # if resuming, need to have params, stats and checkpoint files if not (os.path.isfile(os.path.join(exp_path, 'params.json')) and os.path.isfile(os.path.join(exp_path, 'stats.json')) and os.path.isfile(os.path.join(exp_path, 'checkpoint'))): raise Exception( 'Missing params, stats or checkpoint file (resume)') net = torch.load(os.path.join(exp_path, 'checkpoint')) # Data loaders train_loader, val_loader = data.mnist(batch_size, sequential=True, permuted=permuted) # Train train.fit_recurrent(train_loader, val_loader, net, exp_path, zoneout, optimizer, aux_weight=aux_weight, cuda=cuda, resume=resume) # Post-trainign visualization post_training(exp_path, val_loader)
#num = re.split('_|\.', filename)[-2] #start_epoch = int(num)+1 print "Modello recuperato dal file "+filename else: print "Nessun file trovato per il modello "+args.model+". Ne verrà creato uno nuovo." args.restart = False # instanzia nuova rete neurale if not args.restart: if args.model == 'RNN': rnn = model.RNN(data.n_letters, args.n_hidden, data.n_categories, cuda=args.cuda) elif args.model == 'LSTM': rnn = model.LSTM(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) elif args.model == 'GRU': rnn = model.GRU(input_size=data.n_letters, hidden_size=args.n_hidden, output_size=data.n_categories, cuda=args.cuda) assert rnn #optimizer = torch.optim.SGD(rnn.parameters(), lr=args.lr) optimizer = torch.optim.Adam(rnn.parameters(), lr=args.lr) criterion = nn.NLLLoss() if args.cuda: rnn.cuda() criterion.cuda() start = time.time() num_batches = data.n_instances / args.batch_size print "num_batches: "+str(num_batches)