def sample(out_len, tweet): with open('models/word2vec.p', 'rb') as f: word2vec = pickle.load(f) word2vec = torch.tensor(word2vec) with open('models/translators.p', 'rb') as f: translators = pickle.load(f) inx2word = {int(k): v for k, v in translators['inx2word'].items()} word2inx = {k: int(v) for k, v in translators['word2inx'].items()} dict_size = len(inx2word) if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") model = RNN(embedding_matrix=word2vec, dict_size=dict_size, hidden_dim=100, n_layers=1) model.load_state_dict(torch.load('models/rnn', map_location=device)) model.eval() model = model.to(device) size = out_len - len(tweet) # Now pass in the previous characters and get a new one for _ in range(size): word, h = predict(model, tweet, device, inx2word, word2inx) if word != '<UNK>': tweet.append(word) h = h.to(device) return ' '.join(tweet)
def predict(cfg, model_path, loader, device, save_path): print(f'Saving predictions @ {save_path}') # define model model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model = model.to(device) # load the model model.load_state_dict(torch.load(model_path)) # just to be sure model.eval() predictions = { 'Object': [], 'Sequence': [], } for c in range(cfg.output_dim): predictions[f'{cfg.task}_prob_{c}'] = [] for batch in loader: inputs = batch['inputs'].to(device) with torch.set_grad_enabled(False): outputs, hiddens = model(inputs) _, preds = torch.max(outputs, 1) softmaxed = torch.nn.functional.softmax(outputs, dim=-1) for i in range(len(batch['paths'])): sequence = pathlib.Path(batch['paths'][i]).stem.replace( '_audio', '') predictions['Object'].append(batch['containers'][i]) predictions['Sequence'].append(sequence.replace('_vggish', '')) for c in range(cfg.output_dim): predictions[f'{cfg.task}_prob_{c}'].append(softmaxed[i, c].item()) predictions_dataset = pd.DataFrame.from_dict(predictions).sort_values( ['Object', 'Sequence']) predictions_dataset.to_csv(save_path, index=False) # returning the dataset because it will be useful for test-time prediction averaging return predictions_dataset
def main(): model = RNN() model = model.to('cuda:0') optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, momentum=0.9) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 50, 60], gamma=0.1) train_dataset = HoleDataset(batch_size=128, dataset_size=128, steps=5) val_dataset = HoleDataset(batch_size=512, dataset_size=5120, steps=20) ckpt_path = os.path.abspath('./checkpoints') if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) trainer = Trainer(model, optimizer, scheduler, train_dataset, val_dataset, checkpoint_path='./checkpoints') history = trainer.fit(num_epochs=70, num_train_batch=1000, num_val_batch=10)
def main(args): # prepare data train_texts, train_labels = read_data(os.path.join(args.data_dir, 'train')) test_texts, test_labels = read_data(os.path.join(args.data_dir, 'test')) training_set = list(zip(train_texts, train_labels)) test_set = list(zip(test_texts, test_labels)) random.shuffle(training_set) random.shuffle(test_set) vocab_counter = Counter(flatten([get_words(text) for text in train_texts])) word2vec = vocab.Vocab(vocab_counter, max_size=20000, min_freq=3, vectors='glove.6B.100d') model = RNN(args.input_size, args.hidden_size, args.nb_class) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) train(training_set, model, criterion, optimizer, args.batch_size, args.nb_epoch, word2vec) evaluate(test_set, model, args.batch_size, word2vec) torch.save(model.state_dict(), args.weights_file) writer.close()
parser.add_argument('--device', default='cuda', type=str, help="device to train on") args = parser.parse_args() device = getattr(args, "device") exp_name = f"Exp_optim_{args.optimizer}_lr_{args.learning_rate}_hu_{args.hidden_units}" writer = SummaryWriter(f'runs/{exp_name}') print(device) ### Create Model here Model = RNN(args) net = Model.to(torch.device(device)) ### create or instantiate data loader _data = TorchvisionDataLoader(args) _data.prepare_data() # optimizer stuff here Optimizer = getattr(optim, getattr(args, "optimizer")) optimizer = Optimizer(net.parameters(), lr=getattr(args, "learning_rate")) # loss function loss_fn = nn.CrossEntropyLoss() _data.setup(stage='fit') for i in range(getattr(args, "epochs")):
parser.add_argument('--device', default='cpu', type=str, help="device to train on") args = parser.parse_args() device = getattr(args, "device") exp_name = f"Exp_optim_{args.optimizer}_lr_{args.learning_rate}" writer = SummaryWriter(f'runs/{exp_name}') ### Create Model here Model = RNN(args) net = Model.to(device) ### create or instantiate data loader _data = TorchvisionDataLoader(args) _data.prepare_data() # optimizer stuff here Optimizer = getattr(optim, getattr(args, "optimizer")) optimizer = Optimizer(net.parameters(), lr = getattr(args, "learning_rate")) # loss function loss_fn = nn.CrossEntropyLoss() _data.setup(stage = 'fit') for i in range(getattr(args, "epochs")):
def train(cfg, datasets, dataloaders, device, save_model_path): model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) criterion = torch.nn.CrossEntropyLoss() # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) best_metric = 0.0 best_epoch = 0 best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(cfg.num_epochs): for phase in ['train', 'valid']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 # running_corrects = 0 y_pred = [] y_true = [] # Iterate over data. for batch in dataloaders[phase]: inputs = batch['inputs'].to(device) targets = batch['targets'][cfg.task].to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs, hiddens = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, targets) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) # running_corrects += torch.sum(preds == targets.data) y_pred.extend(preds.tolist()) y_true.extend(targets.tolist()) # if phase == 'train': # scheduler.step() # epoch_acc = running_corrects.double() / len(datasets[phase]) epoch_loss = running_loss / len(datasets[phase]) f1_ep = f1_score(y_true, y_pred, average='weighted') precision_ep = precision_score(y_true, y_pred, average='weighted') recall_ep = recall_score(y_true, y_pred, average='weighted') accuracy_ep = accuracy_score(y_true, y_pred) # print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) print( f'({phase} @ {epoch+1}): L: {epoch_loss:3f}; A: {accuracy_ep:3f}; R: {recall_ep:3f}; ' + f'P: {precision_ep:3f}; F1: {f1_ep:3f}') # deep copy the model if phase == 'valid' and f1_ep > best_metric: best_metric = f1_ep best_epoch = epoch best_model_wts = copy.deepcopy(model.state_dict()) print(f'Best val Metric {best_metric:3f} @ {best_epoch+1}\n') # load best model weights and saves it model.load_state_dict(best_model_wts) torch.save(model.state_dict(), save_model_path) print(f'model is saved @ {save_model_path}') return best_metric
def main(): start_epoch = 0 max_loss = math.inf epochs_since_improvement = 0 dataset = GaitSequenceDataset(root_dir=data_dir, longest_sequence=85, shortest_sequence=55) train_sampler, validation_sampler = generate_train_validation_samplers( dataset, validation_split=0.2) print('Building dataloaders..') train_dataloader = data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_dataloader = data.DataLoader(dataset, batch_size=1, sampler=validation_sampler, drop_last=True) model = RNN(num_features, hidden_dimension, num_classes, num_layers=2).to(device) if load_pretrained is True: print('Loading pretrained model..') checkpoint = torch.load(checkpoint_path) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model.load_state_dict(checkpoint['model_state_dict']) optimizer = checkpoint['optimizer'] else: print('Creating model..') optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss().to(device) if mode == 'train': summary = SummaryWriter() #summary = None model.to(device) print('########### ', model) for epoch in range(start_epoch, start_epoch + num_epochs): if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0: adjust_learning_rate(optimizer, 0.8) train(model, train_dataloader, optimizer, criterion, clip_gradient, device, epoch, num_epochs, summary, loss_display_interval) current_loss = validate(model, validation_dataloader, criterion, device, epoch, num_epochs, summary, loss_display_interval) is_best = max_loss > current_loss max_loss = min(max_loss, current_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(epoch, epochs_since_improvement, model, optimizer, is_best) print('Current loss : ', current_loss, ' Max loss : ', max_loss) else: print('testing...') model = RNN(num_features, hidden_dimension, num_classes, num_layers=2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) model.to(device) print(model) for batch_idx, val_data in enumerate(validation_dataloader): sequence = val_data['sequence'].permute(1, 0, 2).to(device) piano_roll = val_data['piano_roll'].permute(1, 0, 2).squeeze(1).to('cpu') sequence_length = val_data['sequence_length'] file_name = val_data['file_name'] frame = val_data['frame'] leg = val_data['leg'] sonify_sequence(model, sequence, sequence_length) plt.imshow(piano_roll) plt.show() print(file_name, frame, leg) break
def test(args, ckpt_file): print("========== In the test step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="test", args=args, indices=None) INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model.load_state_dict( torch.load(os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))["model"]) model = model.to(device=device) model.eval() predix = 0 predictions = {} truelabels = {} n_val = len(tabular_dataset) with tqdm(total=n_val, desc="Testing round", unit="batch", leave=False) as pbar: for batch in iterator: text, text_length = batch.review labels = batch.sentiment with torch.no_grad(): text = text.to(device) text_length = text_length.to(device) labels = labels.to(device) prediction = model(text, text_length) for logit, label in zip(prediction, labels): # print("logit",logit) # print("label",label) # print("logit.cpu()",logit.cpu()) predictions[predix] = torch.sigmoid(logit.cpu()) truelabels[predix] = label.cpu().numpy().tolist() predix += 1 pbar.update() truelabels_ = [] predictions_ = [] for key in predictions: if predictions[key][0] > 0.5: predictions_.append(1) else: predictions_.append(0) for key in truelabels: truelabels_.append(truelabels[key]) truelabels = truelabels_ predictions = predictions_ return {"predictions": predictions, "labels": truelabels}
class re3Tracker(): def __init__( self, loss_flag=False, checkpoint_name='./final_checkpoint/re3_final_checkpoint.pth'): self.device = device self.CNN = CNN(1, 1).to(self.device) self.RNN = RNN(CNN_OUTPUT_SIZE, 1, 1, True).to(self.device) if os.path.isfile(checkpoint_name): checkpoint = torch.load(checkpoint_name, map_location='cpu') self.CNN.load_state_dict(checkpoint['cnn_model_state_dict']) self.RNN.load_state_dict(checkpoint['rnn_model_state_dict']) else: print("Invalid/No Checkpoint. Aborting...!!") sys.exit() self.CNN = self.CNN.to(device) self.RNN = self.RNN.to(device) self.forward_count = -1 self.previous_frame = None self.cropped_input = np.zeros((2, 3, CROP_SIZE, CROP_SIZE), dtype=np.float32) self.calculate_loss = loss_flag self.criterion = nn.MSELoss() self.MSE_loss = 0 def track(self, image, starting_box=None, gt_labels=None): if starting_box is not None: prev_image = image past_box = starting_box self.forward_count = 0 else: prev_image, past_box = self.previous_frame image_0, output_box0 = im_util.get_crop_input(prev_image, past_box, CROP_PAD, CROP_SIZE) # print('output_box0') # print(output_box0) self.cropped_input[0, ...] = data_preparation(image_0) image_1, _ = im_util.get_crop_input(image, past_box, CROP_PAD, CROP_SIZE) self.cropped_input[1, ...] = data_preparation(image_1) cropped_input_tensor = torch.from_numpy((self.cropped_input)) cropped_input_tensor = cropped_input_tensor.view( -1, 3, CROP_SIZE, CROP_SIZE) with torch.no_grad(): features = self.CNN(cropped_input_tensor.to(self.device)) predicted_bbox = self.RNN(features) # Loss Calculation if starting_box is None and self.calculate_loss == True: gt_labels = torch.from_numpy(gt_labels).float() gt_labels = gt_labels.to(self.device) loss = self.criterion(predicted_bbox, gt_labels) # Running averagae loss self.MSE_loss = (self.MSE_loss * self.forward_count + loss) / (self.forward_count + 1) print(self.MSE_loss) predicted_bbox_array = predicted_bbox.cpu().numpy() #print(predicted_bbox_array.squeeze()) # Save initial LSTM states predicted_bbox_array = predicted_bbox.numpy() # print(predicted_bbox_array.squeeze()) # Save initial LSTM states if self.forward_count == 0: self.RNN.lstm_state_init() output_bbox = im_util.from_crop_coordinate_system( predicted_bbox_array.squeeze() / 10.0, output_box0, 1, 1) # Reset LSTM states to initial state once #MAX_TRACK_LENGTH frames are processed and perform one forward pass if self.forward_count > 0 and self.forward_count % MAX_TRACK_LENGTH == 0: cropped_input, _ = im_util.get_crop_input(image, output_bbox, CROP_PAD, CROP_SIZE) cropped_input = data_preparation(cropped_input) input_image = np.tile(cropped_input[np.newaxis, ...], (2, 1, 1, 1)) input_tensor = torch.from_numpy(np.float32(input_image)).to( self.device) #input_tensor = input_tensor.view(-1,3,CROP_SIZE,CROP_SIZE) self.RNN.reset() features = self.CNN(input_tensor) prediction = self.RNN(features) if starting_box is not None: output_bbox = starting_box self.forward_count += 1 self.previous_frame = (image, output_bbox) return output_bbox
''' batch data X,Y = train_data.getBatch(i) train-data.n_batches ''' n_exp = 1 accs = [] f1s = [] for n in range(n_exp): model = RNN(args) print("Setting Model Complete") model.to(args.device) optimizer = "optim." + args.optim optimizer = eval(optimizer)(model.parameters(), lr=args.lr) #optimizer = optim.RMSprop(model.parameters()) #optimizer = optim.Adam(model.parameters()) #oprimizer = optim.SGD(model.parameters()) print("Setting optimizer Complete") train_main(model, args, train_data, valid_data, optimizer) acc, f1 = test(model, args, test_data) accs.append(acc) f1s.append(f1)
vocab_size = len(word_to_index) embedding_size = 128 num_output = len(label_to_index) model = RNN(vocab_size=vocab_size, embed_size=embedding_size, num_output=num_output, rnn_model="LSTM", use_last=True, hidden_size=128, embedding_tensor=None, num_layers=2, batch_first=True) model.to("cuda:0") import torch import torch.nn as nn import torch.optim as optim from utils import accuracy, AverageMeter optimizer = optim.Adam(model.parameters(), lr=0.005) criterion = nn.CrossEntropyLoss() clip = 0.25 def train(epoches): losses = AverageMeter() top1 = AverageMeter()
def train(): train_seq, test_seq, inx2word, word2inx, word2vec, batch_size = load_data() translators = {'inx2word': inx2word, 'word2inx': word2inx} with open('models/translators.p', 'wb') as f: pickle.dump(translators, f) with open('models/word2vec.p', 'wb') as f: pickle.dump(word2vec, f) dict_size = len(word2inx) word2vec = torch.tensor(word2vec) # check for GPU is_cuda = torch.cuda.is_available() if is_cuda: device = torch.device("cuda") print("GPU is available") else: device = torch.device("cpu") print("GPU not available, CPU used") # Instantiate the model with hyperparameters model = RNN(embedding_matrix=word2vec, dict_size=dict_size, hidden_dim=100, n_layers=1) model.to(device) # Define hyperparameters batch_size = 2000 n_epochs = 100 lr = 0.01 # Define Loss, Optimizer lossfunction = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Training Run for epoch in range(1, n_epochs + 1): epoch_loss = 0 for _, (input_seq, target_seq) in enumerate(train_seq): optimizer.zero_grad( ) # Clears existing gradients from previous epoch input_seq = input_seq.to(device) target_seq = target_seq.to(device) output, h = model(input_seq) h = h.to(device) loss = lossfunction(output, target_seq.view(-1).long()) loss.backward() # Does backpropagation and calculates gradients optimizer.step() # Updates the weights accordingly epoch_loss += loss.item() if epoch % 10 == 0 or epoch == 1: loss_test_total = 0 for input_test, target_test in test_seq: input_test = input_test.to(device) target_test = target_test.to(device) output_test, _ = model(input_test) loss_test = lossfunction(output_test, target_test.view(-1).long()) loss_test_total += loss_test.item() norm_loss = epoch_loss / (len(train_seq) * batch_size) norm_loss_test = loss_test_total / (len(test_seq) * batch_size) print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ') print("Train loss: {:.4f}".format(norm_loss), end=' | ') print("Test loss: {:.4f}".format(norm_loss_test)) torch.save(model.state_dict(), 'models/rnn') print('Training done')
end_time = time.time() data_loading_time = round(end_time - start_time,3) data_prep_mins, data_prep_secs = epoch_time(start_time, end_time) print(f'Data loading Time: {data_prep_mins}m {data_prep_secs}s') pad_idx = TEXT.vocab.stoi[TEXT.pad_token] model = RNN(input_dim, args.embedding_dim, args.hidden_dim, 1, args.n_layers, args.bidirectional, args.dropout, pad_idx) model.embedding.weight.data[pad_idx] = torch.zeros(args.embedding_dim) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device) best_test_loss = float('inf') loss_result = [] acc_result = [] elapsed_time = [] print(f'Training with {tokenizer_name} tokenizer...') for epoch in range(args.n_epochs): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion, device) test_loss, test_acc = evaluate(model, test_iterator, criterion, device)
def main(): parse = argparse.ArgumentParser() parse.add_argument("--train_data_dir", default='./cnews/cnews.train.txt', type=str, required=False) parse.add_argument("--dev_data_dir", default='./cnews/cnews.val.txt', type=str, required=False) parse.add_argument("--test_data_dir", default='./cnews/cnews.test.txt', type=str, required=False) parse.add_argument("--output_file", default='deep_model.log', type=str, required=False) parse.add_argument("--batch_size", default=1, type=int) parse.add_argument("--do_train", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--do_test", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--learnning_rate", default=5e-4, type=float) parse.add_argument("--num_epoch", default=10, type=int) parse.add_argument("--max_vocab_size", default=150000, type=int) parse.add_argument("--min_freq", default=2, type=int) parse.add_argument("--embed_size", default=300, type=int) parse.add_argument("--hidden_size", default=256, type=int) parse.add_argument("--dropout_rate", default=0.2, type=float) parse.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parse.add_argument("--GRAD_CLIP", default=1, type=float) parse.add_argument("--vocab_path", default='./vocab.json', type=str) parse.add_argument("--do_cnn", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--do_rnn", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--do_avg", default=True, action="store_true", help="Whether to run training.") parse.add_argument("--num_filter", default=100, type=int, help="CNN模型一个filter的输出channels") args = parse.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.device = device set_seed() if os.path.exists('./cnews/cache_train_data'): train_data = torch.load('./cnews/cache_train_data') else: train_data = read_corpus(args.train_data_dir) train_data = [(text, labs) for text, labs in zip(*train_data)] torch.save(train_data, './cnews/cache_train_data') if os.path.exists('./cnews/cache_dev_data'): dev_data = torch.load('./cnews/cache_dev_data') else: dev_data = read_corpus(args.dev_data_dir) dev_data = [(text, labs) for text, labs in zip(*dev_data)] torch.save(dev_data, './cnews/cache_dev_data') vocab = build_vocab(args) label_map = vocab.labels if args.do_train: # if args.do_cnn: # cnn_model = CNN(len(vocab.vocab),args.embed_size,args.num_filter,[2,3,4],len(label_map),dropout=args.dropout_rate) # cnn_model.to(device) # train(args,cnn_model,train_data,dev_data,vocab,dtype='CNN') # # if args.do_avg: # avg_model = WordAVGModel(len(vocab.vocab),args.embed_size,len(label_map),dropout=args.dropout_rate) # avg_model.to(device) # train(args, avg_model, train_data, dev_data, vocab, dtype='AVG') if args.do_rnn: rnn_model = RNN(len(vocab.vocab), args.embed_size, args.hidden_size, len(label_map), n_layers=1, bidirectional=True, dropout=args.dropout_rate) rnn_model.to(device) train(args, rnn_model, train_data, dev_data, vocab, dtype='RNN') if args.do_test: if os.path.exists('./cnews/cache_test_data'): test_data = torch.load('./cnews/cache_test_data') else: test_data = read_corpus(args.test_data_dir) test_data = [(text, labs) for text, labs in zip(*test_data)] torch.save(test_data, './cnews/cache_test_data') cirtion = nn.CrossEntropyLoss() cnn_model = CNN(len(vocab.vocab), args.embed_size, args.num_filter, [2, 3, 4], len(label_map), dropout=args.dropout_rate) cnn_model.load_state_dict(torch.load('classifa-best-CNN.th')) cnn_model.to(device) cnn_test_loss, cnn_result = evaluate(args, cirtion, cnn_model, test_data, vocab) avg_model = WordAVGModel(len(vocab.vocab), args.embed_size, len(label_map), dropout=args.dropout_rate) avg_model.load_state_dict(torch.load('classifa-best-AVG.th')) avg_model.to(device) avg_test_loss, avg_result = evaluate(args, cirtion, avg_model, test_data, vocab) rnn_model = RNN(len(vocab.vocab), args.embed_size, args.hidden_size, len(label_map), n_layers=1, bidirectional=True, dropout=args.dropout_rate) rnn_model.load_state_dict(torch.load('classifa-best-RNN.th')) rnn_model.to(device) rnn_test_loss, rnn_result = evaluate(args, cirtion, rnn_model, test_data, vocab) with open(args.output_file, "a") as fout: fout.write('\n') fout.write('=============== test result ============\n') fout.write("test model of {}, loss: {},result: {}\n".format( 'CNN', cnn_test_loss, cnn_result)) fout.write("test model of {}, loss: {},result: {}\n".format( 'AVG', avg_test_loss, avg_result)) fout.write("test model of {}, loss: {},result: {}\n".format( 'RNN', rnn_test_loss, rnn_result))
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train", args=args, indices=labeled) print("Created the iterators") INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model = model.to(device=device) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) unk_idx = TEXT.vocab.stoi["<unk>"] pad_idx = TEXT.vocab.stoi["<pad>"] model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"]) model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"]) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to("cuda") criterion = criterion.to("cuda") if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) model.train() # turn on dropout, etc for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 i = 0 for batch in iterator: # print("Batch is", batch.review[0]) text, text_length = batch.review labels = batch.sentiment text = text.cuda() text_length = text_length.cuda() optimizer.zero_grad() output = model(text, text_length) loss = criterion(torch.squeeze(output).float(), labels.float()) loss.backward() optimizer.step() running_loss += loss.item() if i % 10: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000), end="\r", ) running_loss = 0 i += 1 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
max_length = 20 WEIGHT_PATH = "./weights/text_gen11.23178537686666.pth" device = "cuda" if torch.cuda.is_available() else "cpu" all_letters = string.ascii_letters + " .,;'-" n_letters = len(all_letters) + 1 categories = [ 'Arabic', 'Chinese', 'Korean', 'Japanese', 'French', 'English', 'Czech', 'Irish', 'Portuguese', 'German', 'Scottish', 'Polish', 'Italian', 'Vietnamese', 'Dutch', 'Spanish', 'Russian', 'Greek' ] n_categories = len(categories) cate2index = {v: i for i, v in enumerate(categories)} rnn = RNN(categories, n_letters, 128, n_letters) checkpoint = torch.load(WEIGHT_PATH, map_location=torch.device('cpu')) rnn.load_state_dict(checkpoint) rnn.to(device) # Sample from a category and starting letter def sample(category, start_letter='A', rnn=rnn): with torch.no_grad(): # no need to track history in sampling category_tensor = categoryTensor(cate2index[category]) input = inputTensor(start_letter) hidden = rnn.initHidden() output_name = start_letter for i in range(max_length): output, hidden = rnn(category_tensor, input[0], hidden) topv, topi = output.topk(1) topi = topi[0][0] if topi == n_letters - 1:
def infer(args, unlabeled, ckpt_file): print("========== In the inference step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="infer", args=args, indices=unlabeled) INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model.load_state_dict( torch.load(os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))["model"]) model = model.to(device=device) model.eval() predix = 0 predictions = {} truelabels = {} n_val = len(tabular_dataset) with tqdm(total=n_val, desc="Inference round", unit="batch", leave=False) as pbar: for batch in iterator: text, text_length = batch.review labels = batch.sentiment with torch.no_grad(): text = text.to(device) text_length = text_length.to(device) prediction = model(text, text_length) for logit in prediction: predictions[unlabeled[predix]] = {} sig_prediction = torch.sigmoid(logit) prediction = 0 if sig_prediction > 0.5: prediction = 1 predictions[unlabeled[predix]]["prediction"] = prediction predictions[unlabeled[predix]]["pre_softmax"] = [[ logit_fn(sig_prediction.cpu()), logit_fn(1 - sig_prediction.cpu()) ]] # print(predictions[unlabeled[predix]]["pre_softmax"]) predix += 1 pbar.update() print("The predictions are", predictions) return {"outputs": predictions}
shutil.rmtree(checkpoint_path) os.mkdir(checkpoint_path) model_name = "rnn.pt" # do text parsing, get vocab size and class count build_vocab(args.train, args.output_vocab_label, args.output_vocab_word) label2id, id2label = load_vocab(args.output_vocab_label) word2id, id2word = load_vocab(args.output_vocab_word) vocab_size = len(word2id) num_class = len(label2id) # set model model = RNN(vocab_size=vocab_size, num_class=num_class, emb_dim=args.embedding_dim, emb_droprate=args.embedding_droprate, sequence_len=args.sequence_len, rnn_droprate=args.rnn_droprate, rnn_cell_hidden=args.rnn_cell_hidden, rnn_cell_type=args.rnn_cell_type, birnn=args.birnn, num_layers=args.num_layers) model.build() model.to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6) writer.add_graph(model, torch.randint(low=0, high=1000, size=(args.batch_size, args.sequence_len), dtype=torch.long).to(device)) print(summary(model, torch.randint(low=0, high=1000, size=(args.batch_size, args.sequence_len), dtype=torch.long).to(device))) # padding sequence with <PAD> def padding(data, fix_length, pad, add_first="", add_last=""): if add_first: data.insert(0, add_first) if add_last: data.append(add_last) pad_data = [] data_len = len(data) for idx in range(fix_length): if idx < data_len:
def main(): parser = argparse.ArgumentParser(description="==========[RNN]==========") parser.add_argument("--mode", default="train", help="available modes: train, test, eval") parser.add_argument("--model", default="rnn", help="available models: rnn, lstm") parser.add_argument("--dataset", default="all", help="available datasets: all, MA, MI, TN") parser.add_argument("--rnn_layers", default=3, type=int, help="number of stacked rnn layers") parser.add_argument("--hidden_dim", default=16, type=int, help="number of hidden dimensions") parser.add_argument("--lin_layers", default=1, type=int, help="number of linear layers before output") parser.add_argument("--epochs", default=100, type=int, help="number of max training epochs") parser.add_argument("--dropout", default=0.0, type=float, help="dropout probability") parser.add_argument("--learning_rate", default=0.01, type=float, help="learning rate") parser.add_argument("--verbose", default=2, type=int, help="how much training output?") options = parser.parse_args() verbose = options.verbose if torch.cuda.is_available(): device = torch.device("cuda") if verbose > 0: print("GPU available, using cuda...") print() else: device = torch.device("cpu") if verbose > 0: print("No available GPU, using CPU...") print() params = { "MODE": options.mode, "MODEL": options.model, "DATASET": options.dataset, "RNN_LAYERS": options.rnn_layers, "HIDDEN_DIM": options.hidden_dim, "LIN_LAYERS": options.lin_layers, "EPOCHS": options.epochs, "DROPOUT_PROB": options.dropout, "LEARNING_RATE": options.learning_rate, "DEVICE": device, "OUTPUT_SIZE": 1 } params["PATH"] = "models/" + params["MODEL"] + "_" + params[ "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str( params["HIDDEN_DIM"]) + "_" + str( params["LIN_LAYERS"]) + "_" + str( params["LEARNING_RATE"]) + "_" + str( params["DROPOUT_PROB"]) + "_" + str( params["EPOCHS"]) + "_model.pt" #if options.mode == "train": # print("training placeholder...") train_data = utils.DistrictData(params["DATASET"], "train") val_data = utils.DistrictData(params["DATASET"], "val") params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1] if params["MODEL"] == "rnn": model = RNN(params) elif params["MODEL"] == "lstm": model = LSTM(params) model.to(params["DEVICE"]) criterion = nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=params["LEARNING_RATE"]) if verbose == 0: print(params["PATH"]) else: utils.print_params(params) print("Beginning training...") print() since = time.time() best_val_loss = 10.0 for e in range(params["EPOCHS"]): running_loss = 0.0 #model.zero_grad() model.train() train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4) for batch in train_loader: x = batch['sequence'].to(device) y = batch['target'].to(device) seq_len = batch['size'].to(device) optimizer.zero_grad() y_hat, hidden = model(x, seq_len) loss = criterion(y_hat, y) running_loss += loss loss.backward() optimizer.step() mean_loss = running_loss / len(train_data) val_loss = evaluate(val_data, model, params, criterion, validation=True) if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0): print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) + '=' * 25) print('Training Loss: {}'.format(mean_loss)) print('Validation Loss: {}'.format(val_loss)) print() if e > params["EPOCHS"] / 3: if val_loss < best_val_loss: best_val_loss = val_loss best_model = model.state_dict() torch.save(best_model, params["PATH"]) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Training Loss: {:4f}'.format(mean_loss)) print('Best Validation Loss: {:4f}'.format(best_val_loss)) test_data = utils.DistrictData(params["DATASET"], "test") test_loss = evaluate(test_data, model, params, criterion) print('Test Loss: {}'.format(test_loss)) print()
def predict(cfg, model_path_c1, model_path_c2, model_path_c3, model_path_c4, loader, device, save_path): print(f'Saving predictions @ {save_path}') model_c1 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model_c2 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model_c3 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model_c4 = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model_c1 = model_c1.to(device) model_c2 = model_c2.to(device) model_c3 = model_c3.to(device) model_c4 = model_c4.to(device) # load the model model_c1.load_state_dict(torch.load(model_path_c1)) model_c2.load_state_dict(torch.load(model_path_c2)) model_c3.load_state_dict(torch.load(model_path_c3)) model_c4.load_state_dict(torch.load(model_path_c4)) # just to be sure model_c1.eval() model_c2.eval() model_c3.eval() model_c4.eval() predictions = { 'Object': [], 'Sequence': [], } for c in range(cfg.output_dim): predictions[f'{cfg.task}_prob_{c}'] = [] for batch in loader: inputs = batch['inputs'].to(device) with torch.set_grad_enabled(False): # (B, T, D) outputs_c1, hiddens = model_c1(inputs[:, 0, :, :]) outputs_c2, hiddens = model_c2(inputs[:, 1, :, :]) outputs_c3, hiddens = model_c3(inputs[:, 2, :, :]) outputs_c4, hiddens = model_c4(inputs[:, 3, :, :]) outputs = outputs_c1 + outputs_c2 + outputs_c3 + outputs_c4 _, preds = torch.max(outputs, 1) softmaxed = torch.nn.functional.softmax(outputs, dim=-1) for i in range(len(batch['paths'])): sequence = pathlib.Path(batch['paths'][i]).stem predictions['Object'].append(batch['containers'][i]) predictions['Sequence'].append(sequence) for c in range(cfg.output_dim): predictions[f'{cfg.task}_prob_{c}'].append(softmaxed[i, c].item()) predictions_dataset = pd.DataFrame.from_dict(predictions).sort_values( ['Object', 'Sequence']) predictions_dataset.to_csv(save_path, index=False) # returning the dataset because it will be useful for test-time prediction averaging return predictions_dataset