def infer(minmax, data_train, data_test): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # lstm_train_model = LSTM() model = LSTM().to(device) model.load_state_dict( torch.load("D:\stock\weights\checkpont_67.27376310428824.pth")) model.eval() test_size = len(data_test) future_day = test_size timestamp = 5 output_predict = np.zeros( (data_train.shape[0] + future_day, data_train.shape[1])) output_predict[0] = data_train.iloc[0] for k in range(0, (data_train.shape[0] // timestamp) * timestamp, timestamp): index = min(k + timestamp, output_predict.shape[0] - 1) batch_x = np.expand_dims(df.iloc[k:index, :].values, axis=0) batch_y = df.iloc[k + 1:index + 1, :].values batch_x = torch.Tensor(batch_x).to(device) batch_y = torch.Tensor(batch_y).to(device) out_logits = model(batch_x) # init_value = last_state output_predict[k + 1:k + timestamp + 1] = out_logits.cpu().detach().numpy()[0] output_predict = minmax.inverse_transform(output_predict) return output_predict
def show_result(self): files = os.listdir(self.output) for file in files: if ".pth" in file: path = os.path.join(self.output, file) lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) lstm_model.load_state_dict(torch.load(path)) lstm_model.eval() print("model : %s loaded" % path) predictions = [] for (x, _) in self.testing_dataloader: if x.shape[0] == self.batch_size: with torch.no_grad(): lstm_model.hidden_cell = ( torch.zeros(1, self.batch_size, lstm_model.nb_neurons), torch.zeros(1, self.batch_size, lstm_model.nb_neurons)) output = lstm_model(x.float()) output = self.data.unnormalizeData( output).squeeze() predictions += output.tolist() plt.plot(predictions, label="prediction") plt.plot(self.real_data_test, label="target") plt.title(file) plt.legend() plt.show()
def test(test, feature, model, hidden, layer, output, index2char, index2phone, phone_map, phone2index): ans = open(output,'w') ans.write('id,phone_sequence\n') test_set = Feature_Dataset(feature,'test') if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 if model == 'LSTM': test_model = LSTM(feature_dim, hidden, layer) elif model == 'BiLSTM': test_model = LSTM(feature_dim,hidden,layer,bi = True) elif model == 'C_RNN': group_size = 5 test_model = C_RNN(group_size, feature_dim, hidden, layer) checkpoint = torch.load(test) test_model.load_state_dict(checkpoint['model']) test_model.eval() if USE_CUDA: test_model = test_model.cuda() for i in tqdm(range(1,len(test_set)+1)): data = test_set[i-1] speaker = data[0] test_feature = Variable(data[1].float()) test_hidden = test_model.init_hidden() output = torch.max(test_model(test_feature,test_hidden),1)[1] result = test_trim(index2char,index2phone, phone_map, phone2index, output.data.cpu().numpy()) ans.write('{},{}\n'.format(speaker,result)) ans.close()
def main(args): if args.model == 'base': postprocessing = None elif args.model == 'jump': postprocessing = pick_fix_length(400, PAD_TOKEN) TEXT = data.Field(lower=True, postprocessing=postprocessing, pad_token=PAD_TOKEN, include_lengths=True) LABEL = data.Field(sequential=False, pad_token=None, unk_token=None) train, test = datasets.IMDB.splits(TEXT, LABEL) TEXT.build_vocab(train) LABEL.build_vocab(train) train_iter, test_iter = data.BucketIterator.splits( (train, test), batch_sizes=(args.batch, args.batch * 4), device=args.gpu, repeat=False, sort_within_batch=True) if args.model == 'base': model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab)) elif args.model == 'jump': model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R, args.K, args.N, 80, 8) model.load_pretrained_embedding( get_word2vec(TEXT.vocab.itos, '.vector_cache/GoogleNews-vectors-negative300.bin')) model.cuda(args.gpu) optimizer = optim.Adam(model.parameters(), lr=args.lr) max_accuracy = 0 for i in range(args.epoch): print('Epoch: {}'.format(i + 1)) sum_loss = 0 model.train() for batch in train_iter: optimizer.zero_grad() xs, lengths = batch.text loss = model(xs, lengths, batch.label) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() sum_loss += loss.data[0] print(f'Loss: {sum_loss / len(train_iter)}') sum_correct = 0 total = 0 model.eval() for batch in test_iter: y = model.inference(*batch.text) sum_correct += y.eq(batch.label).sum().float() total += batch.label.size(0) accuracy = (sum_correct / total).data[0] max_accuracy = max(accuracy, max_accuracy) print(f'Accuracy: {accuracy}') print(f'Max Accuracy: {max_accuracy}')
def validate(): stock = "MC.PA" directory = "/Users/baptiste/Desktop/training" input_size = 4 output_size = 4 nb_neurons = 200 test_split = 0.1 time_window = 5 dataloader = Data(stock) df = dataloader.getData() real_data = df.to_numpy() df_normalized = dataloader.normalizeData(df) df_normalized = torch.FloatTensor(df_normalized.to_numpy()) test_split = int(test_split * df.shape[0]) real_test_split = real_data[-test_split:-time_window:, 3] testing_split = df_normalized[-test_split:, :] files = os.listdir(directory) for file in files: if ".pth" in file: path = os.path.join(directory, file) lstm_model = LSTM(input_size, output_size, nb_neurons) lstm_model.load_state_dict(torch.load(path)) print("model : %s loaded" % path) lstm_model.eval() predictions = [] for i in range(testing_split.shape[0] - time_window): x_test = testing_split[i:i + time_window] with torch.no_grad(): lstm_model.hidden_cell = (torch.zeros( 1, 1, lstm_model.nb_neurons), torch.zeros( 1, 1, lstm_model.nb_neurons)) predictions.append( dataloader.unnormalizeData( lstm_model(x_test).tolist())) predictions = np.array(predictions)[:, 3, 0] #plt.figure(15,10) plt.plot(real_test_split, label="target") plt.plot(predictions, label="prediction") plt.title(file) plt.legend() plt.show()
def get_bot_response2(): try: device = torch.device("cpu") with open('data2.json', 'r') as instances: data = json.load(instances) FILE = "dataserialized2.pth" dataserialized = torch.load(FILE) seq_length = dataserialized["seq_length"] input_size = dataserialized["input_size"] hidden_size = dataserialized["hidden_size"] num_layers = dataserialized["num_layers"] num_classes = dataserialized["num_classes"] word_list = dataserialized["word_list"] tags = dataserialized["tags"] model_state = dataserialized["model_state"] model = LSTM(seq_length, input_size, hidden_size, num_layers, num_classes).to(device) model.load_state_dict(model_state) model.eval() except Exception as e: print(e) if request.method == "POST": bot = "Convo" user_data = request.json sentence = user_data['message'] # sentence = normalization(sentence) sentence = tokenization(sentence) x = bag_of_words(sentence, word_list) x = torch.from_numpy(x) x = x.reshape(-1, x.shape[0]) x = x.to(device) # x=torch.tensor(x)# print(x.shape) output, hidden = model(x) _, predicted = torch.max(output, dim=1) tag = tags[predicted.item()] prob = torch.softmax(output, dim=1) probability = prob[0][predicted.item()] if (probability.item() > 0.80): for i in data['data']: if tag == i['tag']: return jsonify(random.choice(i['bot_responses'])) else: return jsonify("I do not understand...")
def train(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') seq, n_batch, n_vocab = get_data_from_file(flags.trainfile, flags.batch_size, flags.seq_size) model = LSTM(n_vocab, flags.seq_size, flags.embedding_size, flags.lstm_size).to(device) #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7) optimizer = torch.optim.Adam(model.parameters(), lr=flags.lr) loss_function = nn.CrossEntropyLoss() total_acc = [] total_loss = [] for e in range(flags.num_epochs): (state_h_1, state_c_1), (state_h_2, state_c_2) = model.zero_state(flags.batch_size) state_h_1 = state_h_1.to(device) state_c_1 = state_c_1.to(device) state_h_2 = state_h_2.to(device) state_c_2 = state_c_2.to(device) model.train() epoch_acc = [] epoch_loss = [] for i, (x, y) in enumerate(batch(seq, n_batch, flags.batch_size, device)): optimizer.zero_grad() logits, (state_h_1, state_c_1), (state_h_2, state_c_2) = model( x, (state_h_1, state_c_1), (state_h_2, state_c_2)) #print("shape input {} , shape output {} ".format(np.shape(x),np.shape(logits))) #print(np.shape(logits),np.shape(y)) loss = loss_function(logits, y) resp = logits.detach().cpu() if e == 90: stream(x, y, resp) acc = accuracy(y, resp) epoch_acc.append(acc) state_h_1 = state_h_1.detach() state_c_1 = state_c_1.detach() state_h_2 = state_h_2.detach() state_c_2 = state_c_2.detach() loss_value = loss.item() epoch_loss.append(loss_value) loss.backward() _ = torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm) optimizer.step() model.eval() epoch_acc_test = [] print("epoch : {} loss {} acc train : {} ".format( e, np.mean(epoch_loss), np.mean(epoch_acc))) total_acc.append(np.mean(epoch_acc)) total_loss.append(np.mean(epoch_loss)) """for name, param in model.named_parameters(): if param.requires_grad: print(name, param.data)""" return model, total_acc, total_loss, flags.lr
print('model loaded') ## get positional code ## if opt['test']['use_ztta']: ztta = gen_ztta().cuda() # print('ztta:', ztta.size()) # assert 0 version = opt['test']['version'] # writer = SummaryWriter(log_dir) loss_total_min = 10000000.0 for epoch in range(opt['test']['num_epoch']): state_encoder.eval() offset_encoder.eval() target_encoder.eval() lstm.eval() decoder.eval() loss_total_list = [] for i_batch, sampled_batch in enumerate(lafan_loader_test): # if i_batch != 33: # continue pred_img_list = [] gt_img_list = [] img_list = [] # print(i_batch, sample_batched['local_q'].size()) loss_pos = 0 loss_quat = 0 loss_contact = 0
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) idx_to_word, word_to_idx, vocab_size, in_text, out_text = read_file( train_file, batch_size, seq_size) num_batches, _ = in_text.shape val_index = np.random.choice(np.arange(num_batches), int(num_batches * val_data_proportion), replace=False) train_index = np.delete(np.arange(num_batches), val_index) train_in_text = in_text[train_index, :] train_out_text = out_text[train_index, :] val_in_text = in_text[val_index, :] val_out_text = out_text[val_index, :] # print(num_batches) # print(train_in_text.shape) # print(val_in_text.shape) # print(vocab_size) lstm_model = LSTM(vocab_size, seq_size, emb_size, hidden_size) lstm_model = lstm_model.to(device) lstm_optim = optim.Adam(lstm_model.parameters(), lr=l_rate) loss_function = torch.nn.CrossEntropyLoss() train_set_loss = [] val_set_loss = [] for i in range(epoch): train_batches = generate_batch(train_in_text, train_out_text, batch_size, seq_size) val_batches = generate_batch(val_in_text, val_out_text, batch_size, seq_size) h0, c0 = lstm_model.initial_state(batch_size) h0 = h0.to(device) c0 = c0.to(device) total_loss, iterations, val_loss, val_iterations = 0, 0, 0, 0 # training_batch for x, y in train_batches: iterations += 1 lstm_model.train() # shape of x is (batch_size, seq_size) x = torch.tensor(x).to(device) y = torch.tensor(y).to(device) lstm_optim.zero_grad() logits, (h0, c0) = lstm_model(x, (h0, c0)) _, _, n_cat = logits.shape loss = loss_function(logits.view(-1, n_cat), y.view(-1)) total_loss += loss.item() loss.backward() # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. h0 = h0.detach() c0 = c0.detach() _ = torch.nn.utils.clip_grad_norm_(lstm_model.parameters(), gradients_norm) lstm_optim.step() # break for x_val, y_val in val_batches: val_iterations += 1 lstm_model.eval() x_val = torch.tensor(x_val).to(device) y_val = torch.tensor(y_val).to(device) logits, (h0, c0) = lstm_model(x_val, (h0, c0)) _, _, n_cat = logits.shape loss = loss_function(logits.view(-1, n_cat), y_val.view(-1)) val_loss += loss.item() avg_loss = total_loss / iterations val_avg_loss = val_loss / val_iterations train_set_loss.append(avg_loss) val_set_loss.append(val_avg_loss) print('Epoch: {}'.format(i), 'Loss: {}'.format(avg_loss), 'Validation Loss: {}'.format(val_avg_loss)) # if i % 10 == 0: # torch.save(lstm_model.state_dict(),'checkpoint_pt/model-{}.pth'.format(i)) _ = predict(device, lstm_model, vocab_size, word_to_idx, idx_to_word, top_k=predict_top_k) return train_set_loss, val_set_loss
def main(): if dataset == 'train' or dataset == 'val': data_dir = './data/train.tsv' data_x, data_y = load_data(data_dir, _type='train', _sent_only=sent_only) else: data_dir = './data/test.tsv' data_x = load_data(data_dir, _type='test', _sent_only=sent_only) print('Load data with size', len(data_x)) _dict = load_dict(in_dir) data_x = word2index(data_x, _dict) data_x, lengths = Padding(data_x) if dataset == 'train' or dataset == 'val': train_x, train_y, train_l, valid_x, valid_y, valid_l = \ Split_data(data_x, data_y, lengths, 0.8) if dataset == 'train': data_x, data_y, lengths = train_x, train_y, train_l if dataset == 'val': data_x, data_y, lengths = valid_x, valid_y, valid_l data_set = Data.TensorDataset(data_x, lengths) test_loader = Data.DataLoader(dataset=data_set, batch_size=batch_size, shuffle=False, num_workers=0) print('Dataset load done') dim_in = len(_dict) dim_out = 5 model = LSTM(dim_in, dim_out, input_size=embed_size, device=device).to(device) if _step == 0: model_name = 'best.pth' else: model_name = ('model%d.pth' % _step) model_dir = os.path.join(os.path.join(in_dir, 'checkpoint'), model_name) if device == 'cuda': weight_dict = torch.load(model_dir) else: weight_dict = torch.load(model_dir, map_location='cpu') model.load_state_dict(weight_dict) print('Model load done') with torch.no_grad(): predict_ans = torch.LongTensor(0).to(device) model.eval() for step, (inputs, lengths) in enumerate(test_loader): inputs = inputs.to(device) lengths = lengths.to(device) outputs = model(inputs, lengths) predict = outputs.argmax(dim=1) predict_ans = torch.cat((predict_ans, predict)) if step % 10 == 0: print('eval step %d' % step) if dataset == 'test': write_csv(in_dir, predict_ans) print('Test done') else: diff_matrix = np.zeros([5, 5]) data_y = np.array(data_y) predict_ans = np.array(predict_ans) Test_Acc = 0 for x, y in zip(predict_ans, data_y): diff_matrix[x, y] += 1 Test_Acc += (x == y) for i in range(5): data_size = len(np.where(data_y == i)[0]) if data_size > 0: diff_matrix[:, i] /= data_size diff_matrix[np.where(diff_matrix < 1e-3)] = 0 print("diff_matrix:") print(diff_matrix) print(len(data_y), Test_Acc) Test_Acc /= len(data_y) print("Test_Acc:", Test_Acc)
def train(config, start_epoch=1, best_validation_loss=np.inf): """Trains AWD-LSTM model using parameters from config.""" print(f'Training for {config.epochs} epochs using the {config.dataset}', f'dataset with lambda value of {config.encoding_lmbd}') device = torch.device(config.device) dataLoader = DataLoader(config.dataset, config.batch_size, device, config.bptt) model = LSTM(embedding_size=config.embedding_size, hidden_size=config.hidden_size, lstm_num_layers=config.lstm_num_layers, vocab_size=len(dataLoader.corpus.dictionary), batch_size=config.batch_size, dropoute=config.dropoute, dropouti=config.dropouti, dropouth=config.dropouth, dropouto=config.dropouto, weight_drop=config.weight_drop, tie_weights=config.tie_weights, device=device) # D is set of gendered words, N is neutral words (not entirely correct, but close enough) D, N = get_gendered_words(config.dataset, dataLoader.corpus) criterion = torch.nn.CrossEntropyLoss(reduction='mean') optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) def using_asgd(): """Checks if optimizer is using ASGD""" return 't0' in optimizer.param_groups[0] if not config.overwrite and check_model_exists(config): print("Loading model from precious state") model, optimizer, start_epoch, best_validation_loss = load_current_state( model, optimizer, config) if using_asgd(): temp = torch.optim.ASGD(model.parameters(), lr=config.learning_rate, t0=0, lambd=0., weight_decay=config.weight_decay) temp.load_state_dict(optimizer.state_dict()) optimizer = temp print("start epoch", start_epoch) params = list(model.parameters()) + list(criterion.parameters()) val_losses = deque(maxlen=config.nonmono) for e in range(start_epoch, config.epochs + 1): epoch_done = False model.train() model.initialize_hidden() epoch_loss = 0 # Loss over the epoch n_batch = 0 # Number of batches that have been done t_start = time.time() print(f"starting epoch {e}/{config.epochs}") while not epoch_done: lr = optimizer.param_groups[0]['lr'] # tr_batch, tr_labels are matrices with horizontal sequences. # seq_len is the sequence length in this iteration of the epoch, # see the openreviewpaper mentioned in the dataloader file tr_batch, tr_labels, seq_len, epoch_done = dataLoader.get_train_minibatch( ) # Rescale learning rate for sequence length optimizer.param_groups[0]['lr'] = lr * seq_len / config.bptt n_batch += 1 model.detach_hidden() # Need to prevent improper backprop optimizer.zero_grad() out, _, lstm_raw_out, lstm_drop_out = model(tr_batch, return_out=True) loss = criterion(out.permute(0, 2, 1), tr_labels.t()) # AR optimisation if config.alpha: loss += config.alpha * lstm_drop_out.pow(2).mean() # TAR optimisation if config.beta: loss += config.beta * (lstm_raw_out[1:] - lstm_raw_out[:-1]).pow(2).mean() # Encoding bias regularization if config.encoding_lmbd > 0: loss += bias_regularization_term(model.embed.weight, D, N, config.bias_variation, config.encoding_lmbd) # Decoding bias regularization if config.decoding_lmbd > 0: loss += bias_regularization_term(model.decoder.weight, D, N, config.bias_variation, config.decoding_lmbd) loss.backward() # Gradient clipping added to see effects. Turned off by default if config.clip: torch.nn.utils.clip_grad_norm_(params, config.clip) optimizer.step() # Add current loss to epoch loss epoch_loss += loss.item() # Return learning rate to default optimizer.param_groups[0]['lr'] = lr # Evaluate the training if n_batch % config.batch_interval == 0: cur_loss = epoch_loss / n_batch elapsed = float(time.time() - t_start) examples_per_second = n_batch / elapsed print( '| epoch {:3d} | {:5d} batch | lr {:05.5f} | batch/s {:5.2f} | ' 'train loss {:5.2f} | perplexity {:5.2f} |'.format( e, n_batch, optimizer.param_groups[0]['lr'], examples_per_second, cur_loss, np.exp(cur_loss))) print("Saving current model") save_current_state(model, optimizer, e, best_validation_loss, config) # Evaluate the model on the validation set for early stopping if e % config.eval_interval == 0: print('Evaluating on validation for early stopping criterion') test_done = False model.initialize_hidden() model.eval() epoch_loss = 0 n_batch = 0 tot_seq_len = 0 while not test_done: n_batch += 1 va_batch, va_labels, seq_len, test_done = dataLoader.get_validation_minibatch( ) tot_seq_len += seq_len out, _ = model(va_batch) model.detach_hidden() loss = criterion(out.permute(0, 2, 1), va_labels.t()) epoch_loss += loss.item() cur_loss = epoch_loss / n_batch if best_validation_loss > cur_loss: print("best_validation_loss > cur_loss") best_validation_loss = cur_loss val_losses.append(cur_loss) save_for_early_stopping(model, config, best_validation_loss) print( '| epoch {:3d} | lr {:05.5f} | validation loss {:5.2f} | perplexity {:5.2f} |' .format(e, optimizer.param_groups[0]['lr'], cur_loss, np.exp(cur_loss))) if not config.no_asgd and not using_asgd() and ( len(val_losses) == val_losses.maxlen and cur_loss > min(val_losses)): print('Switching to ASGD') optimizer = torch.optim.ASGD(model.parameters(), lr=config.learning_rate, t0=0, lambd=0., weight_decay=config.weight_decay) # Evaluate the model on the test set if e % config.eval_interval == 0: print('Evaluating on test') test_done = False model.eval() model.initialize_hidden() epoch_loss = 0 n_batch = 0 while not test_done: n_batch += 1 te_batch, te_labels, seq_len, test_done = dataLoader.get_test_minibatch( ) out, _ = model(te_batch) model.detach_hidden() loss = criterion(out.permute(0, 2, 1), te_labels.t()) epoch_loss += loss.item() cur_loss = epoch_loss / n_batch print( '| epoch {:3d} | lr {:05.5f} | test loss {:5.2f} | perplexity {:5.2f} |' .format(e, optimizer.param_groups[0]['lr'], cur_loss, np.exp(cur_loss))) print( f'Training is done. Best validation loss: {best_validation_loss}, validation perplexity: {np.exp(best_validation_loss)}' )
def main(): train_dir = './data/train.tsv' train_x, train_y = load_data(train_dir) print('Load train data with size', len(train_x)) init_output_log(out_dir) _dict = build_dict(train_x, out_dir) dim_in = len(_dict) dim_out = 5 train_x = word2index(train_x, _dict) train_x, lengths = Padding(train_x) train_y = torch.LongTensor(train_y) train_x, train_y, train_l, valid_x, valid_y, valid_l = \ Split_data(train_x, train_y, lengths, split_rate) train_set = Data.TensorDataset(train_x, train_y, train_l) if weighted==True: samples_weight = get_samples_weight(train_y) sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) else: sampler = None train_loader = Data.DataLoader( dataset = train_set, batch_size = batch_size, shuffle = False, num_workers = 0, sampler = sampler ) valid_set = Data.TensorDataset(valid_x, valid_y, valid_l) valid_loader = Data.DataLoader( dataset = valid_set, batch_size = batch_size, shuffle = False, num_workers = 0 ) print('Dataset load done') if embd_path is not None: pretrain_embd = load_embadding(embd_path, embed_size, _dict) else: pretrain_embd = None model = LSTM(dim_in, dim_out, \ input_size=embed_size, \ device=device, drop_rate=drop_rate, \ pretrain_embd = pretrain_embd, num_layers = num_layers, freeze = freeze ).to(device) optimizer = Adam(model.parameters(), lr=init_LR, weight_decay=1e-4) loss_fn = nn.CrossEntropyLoss() print('Model load done') print('Trainning start') timer = Timer(epoch_size) min_loss = 100 max_val_acc = 0 Count = 0 for epoch in range(epoch_size): if Count>=5: Count = 0 lr = optimizer.param_groups[0]['lr'] Decay_LR(optimizer, 0.1) lr_new = optimizer.param_groups[0]['lr'] assert lr != lr_new train_loss = 0 train_acc = 0 iter_size = 0 model.train() for step, (inputs, labels, lengths) in enumerate(train_loader): inputs = inputs.to(device) labels = labels.to(device) lengths = lengths.to(device) outputs = model(inputs, lengths) loss = loss_fn(outputs, labels) loss.backward() optimizer.step() predict = outputs.argmax(dim=1) train_acc += len(np.where(predict == labels)[0]) train_loss += loss iter_size += 1 if (step+1)%250 ==0: print("[Iter] epoch %d, iter %d, loss %f"%(epoch, step, loss)) train_loss /= iter_size train_acc /= len(train_x) print("[Epoch] epoch %d, train_loss %f, train_acc %f"%(epoch, train_loss, train_acc)) save_data(out_dir, train_loss, 'train_loss.txt') save_data(out_dir, train_acc, 'train_acc.txt') if train_loss < min_loss: min_loss = train_loss; Count = 0 else: Count += 1 if len(valid_x)>0: valid_loss = 0 valid_acc = 0 iter_size = 0 with torch.no_grad(): model.eval() for step, (inputs, labels, lengths) in enumerate(valid_loader): inputs = inputs.to(device) labels = labels.to(device) lengths = lengths.to(device) outputs = model(inputs, lengths) loss = loss_fn(outputs, labels) predict = outputs.argmax(dim=1) valid_acc += len(np.where(predict == labels)[0]) valid_loss += loss iter_size += 1 valid_loss /= iter_size valid_acc /= len(valid_x) print("[Epoch] epoch %d, valid_loss %f, valid_acc %f"%(epoch, valid_loss, valid_acc)) save_data(out_dir, valid_loss, 'valid_loss.txt') save_data(out_dir, valid_acc, 'valid_acc.txt') if valid_acc > max_val_acc: max_val_acc = valid_acc print('epoch %d, best val acc: %f'%(epoch, valid_acc)) save_model(out_dir, model, 'best.pth') if (epoch+1)%20==0: save_model(out_dir, model, 'model%d.pth'%(epoch)) timer.update() print('Training end')
def main(trial_num): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_type = "lstm" # Hyper-parameters sequence_length = 28 input_size = 28 num_layers = 1 hidden_size = 128 num_classes = 10 batch_size = 100 num_epochs = 20 learning_rate = 0.01 num_trials = 100 a_range = [1.0, 3.0] # a_s = [1.5, 2.0, 2.2, 2.5, 2.7, 3.0] # just for testing # num_trials = 1 # num_epochs = 20 # a_s = [1.0] # for a in a_s: trials = {} for num_trial in range(num_trials): a = random.random() * (a_range[1] - a_range[0]) + a_range[0] print('trial Num: ', trial_num, "a: ", a, "num_trial: ", num_trial) trial = {} trial['a'] = a # define model if model_type == 'lstm': model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device) elif model_type == 'gru': model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) train_dataloader = MNIST_dataloader(batch_size, train=True) test_dataloader = MNIST_dataloader(batch_size, train=False) # Train the model total_step = len(train_dataloader.dataloader) total = 0 total_loss = 0 for epoch in range(num_epochs): model.train() for i, (images, labels) in enumerate(train_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs, hts = model(images) loss = criterion(outputs, labels) total_loss += loss * labels.size(0) total += labels.size(0) # print(LEs, rvals) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() # if (i + 1) % 300 == 0: # print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' # .format(epoch + 1, num_epochs, i + 1, total_step, total_loss / total)) # for i, (name, param) in enumerate(model.named_parameters()): # if i == 3: # print(name, param) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 total_loss = 0 for i, (images, labels) in enumerate(test_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs, _ = model(images) # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # params = (images, (h, c)) # if i == 0: # LEs, rvals = calc_LEs_an(*params, model=model) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() total_loss += loss * labels.size(0) if epoch == (num_epochs - 1): print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, total_loss / total, 100 * correct / total)) saved_model = copy.deepcopy(model) trial[epoch] = {"model": saved_model, "accuracy": 100 * correct / total, "loss": total_loss / total} del saved_model trials[num_trial] = trial pickle.dump(trials, open('trials/{}/models/{}_{}_trials_{}.pickle'.format(model_type, model_type, hidden_size, trial_num), 'wb'))
os.mkdir("checkpoints") if not os.path.exists(os.path.join("checkpoints", "try")): os.mkdir(os.path.join("checkpoints", "try")) model_out_path = "checkpoints/try/model_epoch_{}.pth".format(epoch) torch.save(model, model_out_path) print("Checkpoint saved to {}".format("checkpoints" + "try")) nEpochs = 1 for epoch in range(1, nEpochs + 1): train(epoch) # if epoch % 5 == 0: # checkpoint(epoch) predDat = [] model = model.eval() for step, data in enumerate(test_data, 1): seq = ToVariable(data[0]) trueVal = ToVariable(data[1]) if use_gpu: seq = seq.cuda() trueVal = trueVal.cuda() predDat = model(seq) for i in range(len(predDat[0])): if predDat[0][i] < 0: predDat[0][i] = 0 if predDat[0][i] % 1 > 0.3: predDat[0][i] = math.ceil(predDat[0][i]) else: predDat[0][i] = math.floor(predDat[0][i]) loss_int = loss_function(predDat, trueVal)
#load pretrained LSTM model conv = None if opt.conv: conv = LSTM(n_mels) files = os.listdir(statepath) states = [f for f in files if "lstm_" in f] states.sort() if not len(states) > 0: raise Exception("no states for crnn provided!") state = os.path.join(statepath, states[-1]) if os.path.isfile(state): state = torch.load(state) conv.load_state_dict(state['state_dict']) conv.to(device) conv.eval() del state # print(netG) # print(netD) criterion = nn.BCELoss() fixed_noise = None if opt.ae: fixed_noise = torch.tensor([ vae.encode(Mset[i].to(device)).detach().cpu().numpy() for i in range(1337, 1337 + opt.batchSize) ], dtype=torch.float32) # sample vectors taken from unsmoothened song "Ed Sheeran - Shape of You.mp3"
def main(): # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model_type = "lstm" # Hyper-parameters sequence_length = 28 input_size = 28 num_layers = 1 num_classes = 10 batch_size = 100 num_epochs = 10 learning_rate = 0.01 num_trials = 100 a_s = [2] trials = {} # just for testing num_trials = 1 num_epochs = 5 a_s = np.random.uniform(0.1, 2, [2]) for a in a_s: for num_trial in range(num_trials): print("a: ", a, "num_trial: ", num_trial) hidden_size = 8 trial = {} if model_type == 'lstm': model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device) elif model_type == 'gru': model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) train_dataloader = MNIST_dataloader(batch_size, train=True) test_dataloader = MNIST_dataloader(batch_size, train=False) # Train the model total_step = len(train_dataloader.dataloader) for epoch in range(num_epochs): model.train() for i, (images, labels) in enumerate(train_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) # Forward pass outputs, hts = model(images) loss = criterion(outputs, labels) # print(LEs, rvals) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 300 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for i, (images, labels) in enumerate(test_dataloader.dataloader): images = images.reshape(-1, sequence_length, input_size).to(device) labels = labels.to(device) outputs, _ = model(images) # calculate LEs # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device) # params = (images, (h, c)) # if i == 0: # LEs, rvals = calc_LEs_an(*params, model=model) loss = criterion(outputs, labels) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() if epoch == (num_epochs - 1): print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, loss, 100 * correct / total)) trial[epoch] = {'model': model, 'accuracy': 100 * correct / total, 'loss': loss} trials[num_trial] = trial saved_path = f'../../../dataset/trials/{model_type}/models/' pickle.dump(trials, open(f'{saved_path}/lstm_{hidden_size}_trials_0.pickle', 'wb'))
class dl_model(): def __init__(self, mode): # read config fiel which contains parameters self.config_file = read_yaml() self.mode = mode arch_name = '_'.join([ self.config_file['rnn'], str(self.config_file['num_layers']), str(self.config_file['hidden_dim']) ]) self.config_file['dir']['models'] = self.config_file['dir'][ 'models'].split('/')[0] + '_' + arch_name + '/' self.config_file['dir']['plots'] = self.config_file['dir'][ 'plots'].split('/')[0] + '_' + arch_name + '/' #if not os.path.exists(self.config_file['dir']['models']): # os.mkdir(self.config_file['dir']['models']) #if not os.path.exists(self.config_file['dir']['plots']): # os.mkdir(self.config_file['dir']['plots']) if self.config_file['rnn'] == 'LSTM': from model import LSTM as Model elif self.config_file['rnn'] == 'GRU': from model import GRU as Model else: print("Model not implemented") exit(0) self.cuda = (self.config_file['cuda'] and torch.cuda.is_available()) self.output_dim = self.config_file['num_phones'] if mode == 'train' or mode == 'test': self.plots_dir = self.config_file['dir']['plots'] # store hyperparameters self.total_epochs = self.config_file['train']['epochs'] self.test_every = self.config_file['train']['test_every_epoch'] self.test_per = self.config_file['train']['test_per_epoch'] self.print_per = self.config_file['train']['print_per_epoch'] self.save_every = self.config_file['train']['save_every'] self.plot_every = self.config_file['train']['plot_every'] # dataloader which returns batches of data self.train_loader = timit_loader('train', self.config_file) self.test_loader = timit_loader('test', self.config_file) self.start_epoch = 1 self.test_acc = [] self.train_losses, self.test_losses = [], [] # declare model self.model = Model(self.config_file, weights=self.train_loader.weights) else: self.model = Model(self.config_file, weights=None) if self.cuda: self.model.cuda() # resume training from some stored model if self.mode == 'train' and self.config_file['train']['resume']: self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model( mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) self.start_epoch += 1 # load best model for testing/feature extraction elif self.mode == 'test' or mode == 'test_one': self.model.load_model(mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) self.replacement = { 'aa': ['ao'], 'ah': ['ax', 'ax-h'], 'er': ['axr'], 'hh': ['hv'], 'ih': ['ix'], 'l': ['el'], 'm': ['em'], 'n': ['en', 'nx'], 'ng': ['eng'], 'sh': ['zh'], 'pau': ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'], 'uw': ['ux'] } def train(self): print("Starting training at t =", datetime.datetime.now()) print('Batches per epoch:', len(self.train_loader)) self.model.train() # when to print losses during the epoch print_range = list( np.linspace(0, len(self.train_loader), self.print_per + 2, dtype=np.uint32)[1:-1]) if self.test_per == 0: test_range = [] else: test_range = list( np.linspace(0, len(self.train_loader), self.test_per + 2, dtype=np.uint32)[1:-1]) for epoch in range(self.start_epoch, self.total_epochs + 1): print("Epoch:", str(epoch)) epoch_loss = 0.0 i = 0 while True: i += 1 inputs, labels, lens, status = self.train_loader.return_batch() inputs, labels, lens = torch.from_numpy( np.array(inputs)).float(), torch.from_numpy( np.array(labels)).long(), torch.from_numpy( np.array(lens)).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() lens = lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, lens) loss = self.model.calculate_loss(outputs, labels, lens) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config_file['grad_clip']) self.model.optimizer.step() # store loss epoch_loss += loss.item() if i in print_range: try: print( 'After %i batches, Current Loss = %.7f, Avg. Loss = %.7f' % (i + 1, epoch_loss / (i + 1), np.mean([x[0] for x in self.train_losses]))) except: pass if i in test_range: self.test(epoch) self.model.train() if status == 1: break self.train_losses.append( (epoch_loss / len(self.train_loader), epoch)) # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file # useful to see how loss stabilises in the beginning if epoch % 5 == 0 and epoch < self.test_every: self.test(epoch) self.model.train() elif epoch % self.test_every == 0: self.test(epoch) self.model.train() # plot loss and accuracy if epoch % self.plot_every == 0: self.plot_loss_acc(epoch) # save model if epoch % self.save_every == 0: self.model.save_model(False, epoch, self.train_losses, self.test_losses, self.test_acc, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) def test(self, epoch=None): self.model.eval() correct = 0 total = 0 correct_nopause = 0 total_nopause = 0 pause_id = 27 # confusion matrix data is stored in this matrix matrix = np.zeros((self.output_dim, self.output_dim)) pad_id = self.output_dim print("Testing...") print('Total batches:', len(self.test_loader)) test_loss = 0 with torch.no_grad(): while True: inputs, labels, lens, status = self.train_loader.return_batch() inputs, labels, lens = torch.from_numpy( np.array(inputs)).float(), torch.from_numpy( np.array(labels)).long(), torch.from_numpy( np.array(lens)).long() # print(inputs.shape, labels.shape, lens) if self.cuda: inputs = inputs.cuda() labels = labels.cuda() lens = lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, lens) loss = self.model.calculate_loss(outputs, labels, lens) test_loss += loss.item() outputs = outputs.cpu().numpy() labels = labels.cpu().numpy( )[:, : outputs.shape[1]] # remove extra padding from current batch outputs = np.reshape( outputs[:, :, :-1], (-1, self.output_dim)) # ignore blank token labels = np.reshape(labels, (-1)) total_pad_tokens = np.sum(labels == pad_id) argmaxed = np.argmax(outputs, 1) # total number of correct phone predictions for i in range(len(labels)): if labels[i] != pause_id and labels[ i] != pad_id: # is not pause or pad if argmaxed[i] == labels[i]: correct_nopause += 1 total_nopause += 1 correct += np.sum(argmaxed == labels) total += len(argmaxed) - total_pad_tokens # matrix[i][j] denotes the no of examples classified by model as class j but have ground truth label i for k in range(argmaxed.shape[0]): if labels[k] == pad_id: continue matrix[labels[k]][argmaxed[k]] += 1 if status == 1: break for i in range(self.output_dim): matrix[i] /= sum(matrix[i]) acc_all = correct / total acc_nopause = correct_nopause / total_nopause print(acc_all, acc_nopause) test_loss /= len(self.test_loader) # plot confusion matrix if epoch is not None: filename = self.plots_dir + 'confmat_epoch_acc_' + str( epoch) + '_' + str(int(100 * acc_all)) + '.png' plt.clf() plt.imshow(matrix, cmap='hot', interpolation='none') plt.gca().invert_yaxis() plt.xlabel("Predicted Label ID") plt.ylabel("True Label ID") plt.colorbar() plt.savefig(filename) print("Testing accuracy: All - %.4f, No Pause - %.4f , Loss: %.7f" % (acc_all, acc_nopause, test_loss)) self.test_acc.append((acc_all, epoch)) self.test_losses.append((test_loss, epoch)) # if testing loss is minimum, store it as the 'best.pth' model, which is used for feature extraction if test_loss == min([x[0] for x in self.test_losses]): print("Best new model found!") self.model.save_model(True, epoch, self.train_losses, self.test_losses, self.test_acc, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) return acc_all # Called during feature extraction. Takes log mel filterbank energies as input and outputs the phone predictions def test_one(self, file_path): (rate, sig) = wav.read(file_path) assert rate == 16000 # sig ranges from -32768 to +32768 AND NOT -1 to +1 feat, energy = fbank(sig, samplerate=rate, nfilt=self.config_file['feat_dim'], winfunc=np.hamming) tsteps, hidden_dim = feat.shape # calculate log mel filterbank energies for complete file feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim)) lens = np.array([tsteps]) inputs, lens = torch.from_numpy( np.array(feat_log_full)).float(), torch.from_numpy( np.array(lens)).long() id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()} self.model.eval() with torch.no_grad(): if self.cuda: inputs = inputs.cuda() lens = lens.cuda() # Pass through model a = time.time() outputs = self.model(inputs, lens).cpu().numpy() print(time.time() - a) # Since only one example per batch and ignore blank token outputs = outputs[0, :, :-1] softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:, None] return softmax, id_to_phone # Test for each wav file in the folder and also compare with ground truth def test_folder(self, test_folder, top_n=1, show_graphs=False): accs = [] for wav_file in sorted(os.listdir(test_folder)): # Read input test file wav_path = os.path.join(test_folder, wav_file) dump_path = wav_path[:-4] + '_pred.txt' # Read only wav if wav_file == '.DS_Store' or wav_file.split( '.')[-1] != 'wav': # or os.path.exists(dump_path): continue (rate, sig) = wav.read(wav_path) assert rate == 16000 # sig ranges from -32768 to +32768 AND NOT -1 to +1 feat, energy = fbank(sig, samplerate=rate, nfilt=self.config_file['feat_dim'], winfunc=np.hamming) tsteps, hidden_dim = feat.shape # calculate log mel filterbank energies for complete file feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim)) lens = np.array([tsteps]) inputs, lens = torch.from_numpy( np.array(feat_log_full)).float(), torch.from_numpy( np.array(lens)).long() id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()} self.model.eval() with torch.no_grad(): if self.cuda: inputs = inputs.cuda() lens = lens.cuda() # Pass through model outputs = self.model(inputs, lens).cpu().numpy() # Since only one example per batch and ignore blank token outputs = outputs[0, :, :-1] softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:, None] softmax_probs = np.max(softmax, axis=1) # print(softmax) # Take argmax ot generate final string argmaxed = np.argmax(outputs, axis=1) final_str = [id_to_phone[a] for a in argmaxed] # Generate dumpable format of phone, start time and end time ans = compress_seq(final_str) print("Predicted:", ans) phone_path = wav_path[:-3] + 'PHN' # If .PHN file exists, report accuracy if os.path.exists(phone_path): grtuth = read_phones(phone_path, self.replacement) print("Ground truth:", grtuth) unrolled_truth = [] for elem in grtuth: unrolled_truth += [elem[0]] * (elem[2] - elem[1] + 1) truth_softmax = [] top_n_softmax = [[] for x in range(top_n)] # Check for top-n correct, total = 0, 0 for i in range(min(len(unrolled_truth), len(final_str))): truth_softmax.append(softmax[i][self.model.phone_to_id[ unrolled_truth[i]][0]]) indices = list(range(len(final_str))) zipped = zip(indices, outputs[i]) desc = sorted(zipped, key=lambda x: x[1], reverse=True) cur_frame_res = [id_to_phone[x[0]] for x in desc][:top_n] for k in range(top_n): top_n_softmax[k].append(softmax[i][ self.model.phone_to_id[cur_frame_res[k]][0]]) if unrolled_truth[i] in cur_frame_res: # print truth softmax # if unrolled_truth[i] != cur_frame_res[0]: # print(i, truth_softmax[-1]) correct += 1 total += 1 accs.append(correct / total) if show_graphs: # Plot actual softmax and predicted softmax for i in range(top_n): plt.plot(top_n_softmax[i], label=str(i + 1) + ' prob.') print(top_n_softmax) plt.plot(truth_softmax, label='Ground Truth prob', alpha=0.6) plt.xlabel("Frame number") plt.ylabel("Prob") plt.legend() plt.show() with open(dump_path, 'w') as f: f.write('Predicted:\n') for t in ans: f.write(' '.join(str(s) for s in t) + '\n') f.write('\nGround Truth:\n') for t in grtuth: f.write(' '.join(str(s) for s in t) + '\n') f.write('\nTop-' + str(top_n) + ' accuracy is ' + str(correct / total)) else: with open(dump_path, 'w') as f: f.write('Predicted:\n') for t in ans: f.write(' '.join(str(s) for s in t) + '\n') print(accs) # take train/test loss and test accuracy input and plot it over time def plot_loss_acc(self, epoch): plt.clf() plt.plot([x[1] for x in self.train_losses], [x[0] for x in self.train_losses], c='r', label='Train') plt.plot([x[1] for x in self.test_losses], [x[0] for x in self.test_losses], c='b', label='Test') plt.title("Train/Test loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.grid(True) filename = self.plots_dir + 'loss' + '_' + str(epoch) + '.png' plt.savefig(filename) plt.clf() plt.plot([x[1] for x in self.test_acc], [100 * x[0] for x in self.test_acc], c='r') plt.title("Test accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy in %%") plt.grid(True) filename = self.plots_dir + 'test_acc' + '_' + str(epoch) + '.png' plt.savefig(filename) print("Saved plots")
# forward + backward + optimize predict_labels = model( input_features, torch.LongTensor(sorted( lengths)[::-1])) #size= batch_size x video_long(diff) x 11 loss = loss_function(predict_labels, input_vals) #size 64x11 vs 64 loss.backward() optimizer.step() total_loss += loss.cpu().data.numpy() total_batchnum = batch_idx + 1 print("avg training loss:", total_loss / total_batchnum) train_loss.append(total_loss / total_batchnum) # validation accuracy_val = 0 with torch.no_grad(): model.eval() for batch_idx, batch_val in enumerate( range(0, datalen_valid, BATCH_SIZE)): # get the batch items if batch_val + BATCH_SIZE > datalen_valid: valid_features_batch = valid_features[batch_val:] valid_vals_batch = valid_vals[batch_val:] else: valid_features_batch = valid_features[batch_val:batch_val + BATCH_SIZE] valid_vals_batch = valid_vals[batch_val:batch_val + BATCH_SIZE] # sort the content in batch items by video length(how much frame/2048d inside) lengths = np.array([len(x) for x in valid_features_batch]) sorted_indexes = np.argsort(lengths)[::-1] # decreasing valid_features_batch = [ valid_features_batch[i] for i in sorted_indexes
def train(batch_size=64, window_size=3, epochs=100): train_windows_dataset = Andersson_windows_dataset(mode='train', window_size=window_size) train_windows_loader = DataLoader(train_windows_dataset, batch_size=1, shuffle=True) val_windows_dataset = Andersson_windows_dataset(mode='val', window_size=window_size) val_windows_loader = DataLoader(val_windows_dataset, batch_size=1, shuffle=True) base_lr_rate = 1e-2 weight_decay = 0.000016 model = LSTM(input_size=40, hidden_size=512, num_classes=170, n_layers=16).to(device=torch.device('cuda:0')) #criterion = nn.BCEWithLogitsLoss() criterion = nn.CrossEntropyLoss() #criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=base_lr_rate, weight_decay=weight_decay, amsgrad=True) for current_epoch in range(epochs): current_train_iter = 0 current_val_iter = 0 running_train_loss = 0.0 current_average_train_loss = 0.0 running_val_loss = 0.0 current_average_val_loss = 0.0 num_train_data = 0 num_val_data = 0 running_train_correct_preds = 0 running_train_correct_classwise_preds = [0] * 170 running_val_correct_preds = 0 running_val_correct_classwise_preds = [0] * 170 for phase in ['train', 'val']: # Train loop if phase == 'train': train_epoch_since = time.time() model.train() for train_windows, train_track_id in train_windows_loader: train_iterating_dataset = Andersson_iterating_dataset(windows=train_windows, track_id=train_track_id) train_iterating_loader = DataLoader(train_iterating_dataset, batch_size=batch_size, shuffle=True) #train_iterator = iter(train_iterating_loader) for train_batch_window, train_batch_label in train_iterating_loader: current_train_iter += 1 outs = model(train_batch_window) #scheduler = poly_lr_scheduler(optimizer = optimizer, init_lr = base_lr_rate, iter = current_iter, lr_decay_iter = 1, # max_iter = max_iter, power = power) # max_iter = len(train_loader) optimizer.zero_grad() #loss = criterion(outs, train_batch_label) gt_confidence, gt_index = torch.max(train_batch_label, dim=1) loss = criterion(outs, gt_index) running_train_loss += loss.item() current_average_train_loss = running_train_loss / current_train_iter loss.backward(retain_graph=False) optimizer.step() pred_confidence, pred_index = torch.max(outs, dim=1) #gt_confidence, gt_index = torch.max(train_batch_label, dim=1) batch_correct_preds = torch.eq(pred_index, gt_index).long().sum().item() batch_accuracy = (batch_correct_preds / train_batch_window.shape[0]) * 100 num_train_data += train_batch_window.shape[0] running_train_correct_preds += batch_correct_preds if current_train_iter % 10 == 0: print(f"\nITER#{current_train_iter} BATCH TRAIN ACCURACY: {batch_accuracy}, RUNNING TRAIN LOSS: {loss.item()}") print(f"Predicted / GT index:\n{pred_index}\n{gt_index}\n") last_epoch_average_train_loss = current_average_train_loss epoch_accuracy = (running_train_correct_preds / num_train_data) * 100 print(f"EPOCH#{current_epoch+1} EPOCH TRAIN ACCURACY: {epoch_accuracy}, AVERAGE TRAIN LOSS: {last_epoch_average_train_loss}") train_time_elapsed = time.time() - train_epoch_since # Validation loop elif phase == 'val': val_epoch_since = time.time() model.eval() with torch.no_grad(): for val_windows, val_track_id in val_windows_loader: val_iterating_dataset = Andersson_iterating_dataset(windows=val_windows, track_id=val_track_id) val_iterating_loader = DataLoader(val_iterating_dataset, batch_size=batch_size, shuffle=True) #val_iterator = iter(val_iterating_loader) for val_batch_window, val_batch_label in val_iterating_loader: current_val_iter += 1 outs = model(val_batch_window) gt_confidence, gt_index = torch.max(val_batch_label, dim=1) #val_loss = criterion(outs, val_batch_label) val_loss = criterion(outs, gt_index) running_val_loss += val_loss.item() current_average_val_loss = running_val_loss / current_val_iter pred_confidence, pred_index = torch.max(outs, dim=1) #gt_confidence, gt_index = torch.max(val_batch_label, dim=1) batch_correct_preds = torch.eq(pred_index, gt_index).long().sum().item() batch_accuracy = (batch_correct_preds / val_batch_window.shape[0]) * 100 num_val_data += val_batch_window.shape[0] running_val_correct_preds += batch_correct_preds if current_val_iter % 10 == 0: print(f"ITER#{current_val_iter} BATCH VALIDATION ACCURACY: {batch_accuracy}, RUNNING VALIDATION LOSS: {val_loss.item()}") print(f"Predicted / GT index: {pred_index} / {gt_index}\n") last_epoch_average_val_loss = current_average_val_loss epoch_accuracy = (running_val_correct_preds / num_val_data) * 100 print(f"EPOCH#{current_epoch+1} EPOCH VALIDATION ACCURACY: {epoch_accuracy}, AVERAGE VALIDATION LOSS: {last_epoch_average_val_loss}") val_time_elapsed = time.time() - val_epoch_since
loss = criterion(output, target) # backpropagation, compute gradients loss.backward() # apply gradients optimizer.step() train_loss += loss.data.item() y_pred = output.argmax(dim=1, keepdim=True) train_correct += y_pred.eq(target.view_as(y_pred)).sum().item() train_loss /= len(train_iter) train_accuracy = 100 * train_correct / len(train_iter.dataset) net.eval() val_correct = 0 for batch in val_iter: text, target = batch.text, batch.label output = net(text) loss = criterion(output, target) val_loss += loss.data.item() y_pred = output.argmax(dim=1, keepdim=True) val_correct += y_pred.eq(target.view_as(y_pred)).sum().item() val_loss /= len(val_iter) val_accuracy = 100 * val_correct / len(val_iter.dataset) print(f"Epoch {epoch + 1} :: Train/Loss {round(train_loss, 3)} :: " "Train/Accuracy {round(train_accuracy, 3)}") print(f"Epoch {epoch + 1} :: Val/Loss {round(val_loss, 3)} :: "
def main(train_type=None): model_path = './model.pth' # dir_path = Path('/home/g19tka13/Downloads/data/3C') # data_path = dir_path / 'taskA/train.csv' train_data, weighted = strtolist() test_data = loadtestdata() preudo_list = [] used_unlabeled_data = None unlabeled_data = None vocab = None if train_type == 'self_train': unlabeled_data = pd.read_csv('/home/g19tka13/taskA/aclgenerate.csv', sep=',') unlabeled_data = unlabeled_data.head(3000) vocab = load_word_vector(train_data, test_data, 'self_train', unlabeled_data) # prelabeled_data = None # vocab = load_word_vector(train_data, test_data, 'self_train', used_unlabeled_data) # # if len(preudo_list) == 0: # 判断是否第一次训练模型。 # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1) # else: # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1, prelabeled_data) # 加入数据 else: vocab = load_word_vector(train_data, test_data) # train_iter, val_iter, label_word_id = assemble(train_data, vocab, 1) # test_iter, unlabel_iter = assemble(test_data, vocab, 0) # return train_iter, val_iter, test_iter, vocab, weighted, label_word_id best_val_f1 = 0 if train_type == 'self_train': prelabel_data = None vocab_size = vocab.vectors.size() print('Total num. of words: {}, word vector dimension: {}'.format( vocab_size[0], vocab_size[1])) model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=10) model.embedding.weight.data = vocab.vectors model.embedding.weight.requires_grad = False print(model) while len(preudo_list) < 2700: class_id = [] delete_id = [] if len(preudo_list) == 0: # 判断是否第一次训练模型。 train_iter, val_iter, label_word_id = assemble( train_data, vocab, 1) else: train_iter, val_iter, label_word_id = assemble( train_data, vocab, 1, prelabeled_data=prelabel_data) # 加入数据 test_iter, unlabel_iter = assemble(test_data, vocab, 0, unlabeled_data=unlabeled_data) weight = torch.tensor(weighted) train_iter = Data.DataLoader(train_iter, batch_size=10, shuffle=True) val_iter = Data.DataLoader(val_iter, batch_size=10, shuffle=True) test_iter = Data.DataLoader(test_iter, batch_size=10, shuffle=False) unlabel_iter = Data.DataLoader(unlabel_iter, batch_size=10, shuffle=False) # vocab_size = vocab.vectors.size() # print('Total num. of words: {}, word vector dimension: {}'.format( # vocab_size[0], # vocab_size[1])) # model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=10) # model.embedding.weight.data = vocab.vectors # model.embedding.weight.requires_grad = False # 使用已经训练好的词向量, 即保持词向量不更新(固定词向量) 则设置为false # print(model) # print(model.parameters()) # for parameter in model.parameters(): # print(parameter) optimizer = optim.Adam(model.parameters(), lr=0.0005) n_epoch = 10 # nn.CrossEntropyLoss you will give your weights only once while creating the module # loss_cs = nn.CrossEntropyLoss(weight=weight) # loss_fnc = nn.CosineEmbeddingLoss() # loss_mes = nn.MSELoss() y = torch.ones(1).long() for epoch in range(n_epoch): # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401 model.train() for item_idx, item in enumerate(train_iter, 0): label = item[2] unique_num, count = torch.unique( label, return_counts=True) # default sorted=True unique_num = unique_num.tolist() # print(unique_num, count) real_weight = torch.ones(6, dtype=torch.float) for i in range(6): if i in unique_num: idx = unique_num.index(i) real_weight[i] = 1 / np.log(1.02 + count[idx] / 10) else: real_weight[i] = 1 / np.log(2.02) optimizer.zero_grad() out = model(item) # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out) # fixed weight result=0.1716 # loss = F.cross_entropy(out, label.long(), weight=weight) # real time weight calculation loss = F.cross_entropy(out, label.long(), weight=real_weight) # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。 # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y) # loss = Variable(loss, requires_grad=True) # loss_MES = loss_mes(out, label_vector) # loss = loss_fnc(out, torch.Tensor(one_hot), y) loss.backward() # print(model.lstm.all_weights.shape) # print(model.lstm.) optimizer.step() if (item_idx + 1) % 5 == 0: train_value, train_y_pre = torch.max( out, 1 ) # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置 # print('train_value', train_value) # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float))) # print(train_y_pre, label.long()) f1 = f1_score(label.long(), train_y_pre, average='macro') # print(train_y_pre, label) print( 'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f' % (epoch, item_idx, loss, f1)) model.eval() # 跑完一个epoch就评价一次模型 val_pre_label = [] val_y_label = [] # if (epoch+1) % 5 == 0: with torch.no_grad(): # print(unlabel_iter) # for item in unlabel_iter: # prelabel # index = item[2] # out = model(item) # out = F.softmax(out, dim=1) # predict_value, predict_class = torch.max(out, 1) # print('predict_value', predict_value) # for i in range(len(predict_value)): # if predict_value[i] > 0.9: # delete_id.append(index[i]) # 为了获得数据索引,根据索引从原数据中删除。 # class_id.append(predict_class[i]) for item in val_iter: label = item[2] out = model(item) _, val_y_pre = torch.max(out, 1) val_pre_label.extend(val_y_pre) val_y_label.extend(label) # f1 = f1_score(label.long(), val_y_pre, average='macro') # val_f1.append(f1) # f1 = np.array(f1).mean() f1 = f1_score(torch.Tensor(val_y_label).long(), torch.Tensor(val_pre_label), average='macro') print(f1) if f1 > best_val_f1: print('val acc: %.4f > %.4f saving model %.4f' % (f1, best_val_f1, len(preudo_list))) torch.save(model.state_dict(), model_path) best_val_f1 = f1 model.eval() # 一轮训练结束在创建pseudo-label with torch.no_grad(): for item in unlabel_iter: # prelabel index = item[2] out = model(item) out = F.softmax(out, dim=1) predict_value, predict_class = torch.max(out, 1) # print('predict_value', predict_value) # print('predict_class', predict_class) for i in range(len(predict_value)): if predict_value[i] > 0.9: delete_id.append( index[i].item()) # 为了获得数据索引,根据索引从原数据中删除。 class_id.append(predict_class[i].item()) preudo_list.extend(delete_id) if len(preudo_list) != 0: unlabeled_data, prelabel_data = split_unlabeled_data( unlabeled_data, delete_id, class_id, prelabel_data) else: train_iter, val_iter, label_word_id, label_to_id = assemble( train_data, vocab, 1) test_iter, unlabel_iter = assemble(test_data, vocab, 0) # train_iter, val_iter, test_iter, vocab, weight, label_word_id = load_data() weight = torch.tensor(weighted) train_iter = Data.DataLoader(train_iter, batch_size=batch_size, shuffle=True) val_iter = Data.DataLoader(val_iter, batch_size=batch_size, shuffle=True) test_iter = Data.DataLoader(test_iter, batch_size=batch_size, shuffle=False) vocab_size = vocab.vectors.size() print('Total num. of words: {}, word vector dimension: {}'.format( vocab_size[0], vocab_size[1])) model = LSTM(vocab_size[0], vocab_size[1], hidden_size=100, num_layers=2, batch=batch_size) model.embedding.weight.data = vocab.vectors model.embedding.weight.requires_grad = False print(model) # print(model.parameters()) # for parameter in model.parameters(): # print(parameter) optimizer = optim.Adam(model.parameters(), lr=0.001) n_epoch = 50 best_val_f1 = 0 # nn.CrossEntropyLoss you will give your weights only once while creating the module # loss_cs = nn.CrossEntropyLoss(weight=weight) loss_fnc = nn.CosineEmbeddingLoss(reduction='mean', size_average=True, reduce=True) # loss_mes = nn.MSELoss() one_list = torch.ones((batch_size, 1), dtype=torch.float) zero_list = torch.zeros((batch_size, 1), dtype=torch.float) for epoch in range(n_epoch): # model.train放在哪参考网址 https://blog.csdn.net/andyL_05/article/details/107004401 model.train() batch_loss = 0 for item_idx, item in enumerate(train_iter, 0): label = item[2] unique_num, count = torch.unique( label, return_counts=True) # default sorted=True unique_num = unique_num.tolist() # print(unique_num, count) real_weight = torch.ones(6, dtype=torch.float) for i in range(6): if i in unique_num: idx = unique_num.index(i) real_weight[i] = 1 / np.log(1.02 + count[idx] / batch_size) else: real_weight[i] = 1 / np.log(2.02) optimizer.zero_grad() # out, p_rep, n_rep = model(item, label_to_id) out, out_o, label_matrix, out_len, label_id = model( item, label_to_id) # label_pred = KMeans(n_clusters=6, init=label_out).fit_predict(out) # fixed weight result=0.1716 # loss = F.cross_entropy(out, label.long(), weight=weight) # real time weight calculation p_rep, n_rep = confusion(out_o, label_matrix, out_len, label_id) loss1 = F.cross_entropy(out, label.long(), weight=real_weight) loss2 = loss_fnc(out, p_rep, one_list) loss3 = loss_fnc(out, n_rep, zero_list) loss = loss1 + loss2 + loss3 # batch_loss = batch_loss + +loss2 + loss # nn.CosineEmbeddingLoss() 损失函数需要是二维矩阵,而不是一维的。 # loss = loss_fnc(torch.unsqueeze(label_pred, dim=0), torch.unsqueeze(label.long(), dim=0), y) # loss = Variable(loss, requires_grad=True) # loss_MES = loss_mes(out, label_vector) # loss = loss_fnc(out, torch.Tensor(one_hot), y) loss.backward() # print(model.lstm.all_weights.shape) # print(model.lstm.) optimizer.step() if (item_idx + 1) % 5 == 0: _, train_y_pre = torch.max( out, 1) # max函数有两个返回值(此处out是二维数组)第一个是最大值的list,第二个是值对应的位置 # acc = torch.mean((torch.tensor(train_y_pre == label.long(), dtype=torch.float))) # print(train_y_pre, label.long()) f1 = f1_score(label.long(), train_y_pre, average='macro') # print(train_y_pre, label) print( 'epoch: %d \t item_idx: %d \t loss: %.4f \t f1: %.4f' % (epoch, item_idx, loss, f1)) # batch_loss = 0 # finish each epoch val a time val_pre_label = [] val_y_label = [] # if (epoch + 1) % 5 == 0: model.eval() with torch.no_grad(): for item in val_iter: label = item[2] out = model(item) _, val_y_pre = torch.max(out, 1) val_pre_label.extend(val_y_pre) val_y_label.extend(label) # acc = torch.mean((torch.tensor(val_y_pre == label, dtype=torch.float))) # f1 = f1_score(label.long(), val_y_pre, average='macro') # val_f1.append(f1) # f1 = np.array(f1).mean() f1 = f1_score(torch.Tensor(val_y_label).long(), torch.Tensor(val_pre_label), average='macro') print(f1) if f1 > best_val_f1: print('val acc: %.4f > %.4f saving model' % (f1, best_val_f1)) torch.save(model.state_dict(), model_path) best_val_f1 = f1 test_f1 = [] test_pre_label = [] test_y_label = [] model_state = torch.load(model_path) model.load_state_dict(model_state) model.eval() with torch.no_grad(): for item_idx, item in enumerate(test_iter, 0): label = item[2] out = model(item) _, test_pre = torch.max(out, 1) test_pre_label.extend(test_pre) test_y_label.extend(label) # print('test_true_label={} test_pre_label={}'.format(label, test_y_pre)) # f1 = f1_score(label.long(), test_y_pre, average='macro') # test_f1.append(f1) final_f1 = f1_score(torch.Tensor(test_y_label).long(), torch.Tensor(test_pre_label), average='macro') # final_f1 = np.array(test_f1).mean() print('test_pre_label', collections.Counter(torch.Tensor(test_pre_label).tolist())) print('test_y_label', collections.Counter(torch.Tensor(test_y_label).tolist())) print('test f1 : %.4f' % final_f1) generate_submission(torch.Tensor(test_pre_label).tolist()) count = {} test_pre = torch.Tensor(test_pre_label).tolist() test_true = torch.Tensor(test_y_label).tolist() c_matrxi = confusion_matrix(test_true, test_pre, labels=[0, 1, 2, 3, 4, 5]) print(c_matrxi) for i in range(len(test_true)): if test_true[i] == test_pre[i]: if test_true[i] not in count.keys(): count[test_true[i]] = 1 else: count[test_true[i]] = count[test_true[i]] + 1 print(count) pre_true = pd.DataFrame(columns=['true_id', 'pre_id']) test_true_ser = pd.Series(test_true) test_pre_ser = pd.Series(test_pre) pre_true['true_id'] = test_true_ser pre_true['pre_id'] = test_pre_ser pre_true.to_csv('/home/g19tka13/taskA/true_predict.csv', sep=',', index=False)