def main(epochs=5, learning_rate=1e-3): # use GPU device = torch.device('cuda') # get data loaders training = get_dataloader(train=True) testing = get_dataloader(train=False) # model model = CNN().to(device) info('Model') print(model) # cost function cost = torch.nn.BCELoss() # optimizers optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(1, epochs + 1): info('Epoch {}'.format(epoch)) train(model, device, training, cost, optimizer, epoch) test(model, device, testing, cost) # save model info('Saving Model') save_model(model, device, 'model.onnx') print('Saving PyTorch Model as model.pth') torch.save(model.state_dict(), 'model.pth')
def bilstm_train(self, numEpochs, batch_size, save_file, lr): print('training .....') # set up loss function -- 'SVM Loss' a.k.a ''Cross-Entropy Loss loss_func = nn.CrossEntropyLoss() net = CNN(embed_dim=100) # net.load_state_dict(torch.load('model_50.pth')) # SGD used for optimization, momentum update used as parameter update optimization = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9) net.cuda() loss_func.cuda() train_losses = [] test_losses = [] for epoch in range(0,numEpochs): # training set -- perform model training epoch_training_loss = 0.0 num_batches = 0 pbar = tqdm(range(0, len(self.train_seqs), batch_size)) for batch_num in pbar: # 'enumerate' is a super helpful function # split training data into inputs and labels if batch_num+batch_size>len(self.train_seqs): end = len(self.action_seqs) else: end = batch_num+batch_size raw_inputs, labels_ = self.train_seqs[batch_num:end], self.train_labels[batch_num:end] # 'training_batch' is a list inputs_ = self.get_embedding(raw_inputs) inputs = torch.from_numpy(inputs_).float().cuda() labels = torch.from_numpy(labels_).cuda() # wrap data in 'Variable' inputs, labels = torch.autograd.Variable(inputs), torch.autograd.Variable(labels) # Make gradients zero for parameters 'W', 'b' optimization.zero_grad() # forward, backward pass with parameter update forward_output = net(inputs) loss = loss_func(forward_output, labels) loss.backward() optimization.step() # calculating loss epoch_training_loss += loss.data.item() num_batches += 1 # print(loss.data.item()) pbar.set_description("processing batch %s" % str(batch_num)) print("epoch: ", epoch, ", loss: ", epoch_training_loss / num_batches) # train_loss = self.test(net, batch_size=256, test_data=self.train_seqs, test_label=self.train_labels) test_loss = self.test(net, batch_size=256, test_data=self.test_seqs, test_label=self.test_labels) # train_losses.append(train_loss) test_losses.append(test_loss) # if epoch%10 == 0: # save_path = save_file+'model3_' +str(epoch)+'.pth' # torch.save(net.state_dict(), save_path) # with open('train_loss_1.p','wb') as fin: # pickle.dump(train_losses,fin) # fin.close() with open('test_loss_1.p','wb') as fin: pickle.dump(test_losses,fin) fin.close()
def train(): fluid.enable_dygraph(device) processor = SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count train_data_generator = processor.data_generator( batch_size=args.batch_size, padding_size=args.padding_size, places=device, phase='train', epoch=args.epoch, shuffle=False) eval_data_generator = processor.data_generator( batch_size=args.batch_size, padding_size=args.padding_size, places=device, phase='dev', epoch=args.epoch, shuffle=False) if args.model_type == 'cnn_net': model = CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model = BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model = GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model = BiGRU(args.vocab_size, args.batch_size, args.padding_size) optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters()) inputs = [Input([None, None], 'int64', name='doc')] labels = [Input([None, 1], 'int64', name='label')] model.prepare(optimizer, CrossEntropy(), Accuracy(topk=(1, )), inputs, labels, device=device) model.fit(train_data=train_data_generator, eval_data=eval_data_generator, batch_size=args.batch_size, epochs=args.epoch, save_dir=args.checkpoints, eval_freq=args.eval_freq, save_freq=args.save_freq)
def init_model(nfm=32, res_blocks=1, in_frames=2, batch_size=2, epoch_to_load=None): resnet = ResNet(nfm*2, res_blocks) if torch.cuda.is_available(): resnet=resnet.cuda() my_unet = U_Net(nfm, resnet, 1, 1) discriminator = CNN((in_frames+1)*3, nfm, 512) if epoch_to_load != None: my_unet = torch.load('unet_epoch_{}'.format(epoch_to_load)) discriminator = torch.load('D_epoch_{}'.format(epoch_to_load)) if torch.cuda.is_available(): my_unet, discriminator = my_unet.cuda(), discriminator.cuda() Unet_optim = torch.optim.Adam(my_unet.parameters(), lr=0.002) D_optim = torch.optim.Adam(discriminator.parameters(), lr=0.002) return {'Unet': my_unet, 'Discriminator': discriminator, 'Unet_optimizer': Unet_optim, 'Discriminator_optimizer': D_optim}
def main(args): if not os.path.exists(args.logdir): os.makedirs(args.logdir) logger = get_logger(os.path.join(args.logdir, 'train_source.log')) logger.info(args) # data source_transform = transforms.Compose([transforms.ToTensor()]) source_dataset_train = SVHN('./input', 'train', transform=source_transform, download=True) source_dataset_test = SVHN('./input', 'test', transform=source_transform, download=True) source_train_loader = DataLoader(source_dataset_train, args.batch_size, shuffle=True, drop_last=True, num_workers=args.n_workers) source_test_loader = DataLoader(source_dataset_test, args.batch_size, shuffle=False, num_workers=args.n_workers) # train source CNN source_cnn = CNN(in_channels=args.in_channels).to(args.device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(source_cnn.parameters(), lr=args.lr, weight_decay=args.weight_decay) source_cnn = train_source_cnn(source_cnn, source_train_loader, source_test_loader, criterion, optimizer, args=args)
pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum().item() test_loss /= len(test_loader.dataset) accuracy = 100. * correct / len(test_loader.dataset) accuracy_list.append(accuracy) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), accuracy)) n_features = 8 # number of feature maps model_cnn = CNN(input_size, n_features, output_size) optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5) print('Number of parameters: {}'.format(get_n_params(model_cnn))) for epoch in range(0, 1): train(epoch, model_cnn) test(model_cnn) print("Multiple hidden layers CNN model:") print() model_cnn = deepCNN(input_size, n_features, output_size) optimizer = optim.SGD(model_cnn.parameters(), lr=0.01, momentum=0.5) print('Number of parameters: {}'.format(get_n_params(model_cnn))) for epoch in range(0, 1): train(epoch, model_cnn)
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): if torch.cuda.is_available(): self.model = nn.DataParallel(CNN(self.config)).cuda() else: self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: if torch.cuda.is_available(): self.model = self.model.module self.model.eval() def save(self, ckpt_path): """Save model parameters""" print('Save parameters at ', ckpt_path) if torch.cuda.is_available(): torch.save(self.model.module.state_dict(), ckpt_path) else: torch.save(self.model.state_dict(), ckpt_path) def load(self, ckpt_path=None, epoch=None): """Load model parameters""" if not (ckpt_path or epoch): epoch = self.config.epochs if epoch: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) print (self.model) self.model.load_state_dict(torch.load(ckpt_path)) def train_once(self): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() text.data.t_() logit = self.model(text) average_batch_loss = self.loss_fn(logit, label) loss_history.append(average_batch_loss.item()) self.optimizer.zero_grad() average_batch_loss.backward() self.optimizer.step() epoch_loss = np.mean(loss_history) return epoch_loss def train(self): """Train model with training data""" for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.item()) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.4f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') self.save(ckpt_path) def eval(self): """Evaluate model from text data""" n_total_data = 0 n_correct = 0 loss_history = [] ''' import ipdb ipdb.set_trace() ''' for _, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label if torch.cuda.is_available(): text = text.cuda() label = label.cuda() # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.item()) # Variable -> Tensor # Calculate accuracy n_total_data += len(label) # [batch_size] _, prediction = logit.max(1) n_correct += (prediction == label).sum().data epoch_loss = np.mean(loss_history) accuracy = n_correct.item() / float(n_total_data) print(f'Loss: {epoch_loss:.2f}') print(f'Accuracy: {accuracy}') return epoch_loss, accuracy def inference(self, text): text = Variable(torch.LongTensor([text])) # [batch_size, 2] logit = self.model(text) _, prediction = torch.max(logit) return prediction def train_eval(self): # Set this variable to your MLflow server's DNS name mlflow_server = '172.23.147.124' # Tracking URI mlflow_tracking_URI = 'http://' + mlflow_server + ':5000' print ("MLflow Tracking URI: %s" % (mlflow_tracking_URI)) with mlflow.start_run(): for key, value in vars(self.config).items(): mlflow.log_param(key, value) ''' output_dir = 'mlflow_logs' if not os.path.exists(output_dir): os.mkdir(output_dir) ''' for epoch in tqdm(range(self.config.epochs)): # print out active_run print("Active Run ID: %s, Epoch: %s \n" % (mlflow.active_run(), epoch)) train_loss = self.train_once() mlflow.log_metric('train_loss', train_loss) val_loss, val_acc = self.eval() mlflow.log_metric('val_loss', val_loss) mlflow.log_metric('val_acc', val_acc) # Finish run mlflow.end_run(status='FINISHED')
def main(args): ''' main function - describes the whole model pipeline - it should refer to a separate train function, validation function - loads datasets, initializes models and hyperparameters - sets learning rate and optimizer, passes arguments to the training loop ''' # load my hyperparameters with open(args.config_file, 'r') as f: hyps = json.load(f) # why do we need to transform images? # - images differ from each other size-wise # - random cropping can be helpful because it allows your model to learn different # ways your object can be represented # - normalisation helps to speed up training / utilise pre-trained models better transform = transforms.Compose( [ transforms.Resize((256, 256)), #transforms.RandomCrop((x, x)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ] ) # load the dataset dataset = cats_dogs_dataset(args.data_dir, args.annotations_file, transform=transform) # split dataset into training and validation (25k : 20k + 5k) train_set, validation_set = torch.utils.data.random_split(dataset, [20000 , 5000]) # train loader train_loader = DataLoader(dataset=train_set, shuffle=bool(hyps['shuffle']), batch_size=hyps['batch_size'], num_workers=hyps['num_workers'], pin_memory=bool(hyps['pin_memory'])) # val loader validation_loader = DataLoader(dataset=validation_set, shuffle=bool(hyps['shuffle']), batch_size=hyps['batch_size'], num_workers=hyps['num_workers'], pin_memory=bool(hyps['pin_memory'])) # initialize and import model to GPU model = CNN().to(device) #print(model) for name, param in model.named_parameters(): if 'classifier' in name: param.requires_grad = True else: param.requires_grad = False # define loss (Binary Cross-Entropy for the binary classification task) criterion = nn.BCELoss() # define optimizer (Adam) on model parameters with the specified learning rate optimizer = torch.optim.SGD(model.parameters(), lr=hyps['learning_rate']) train(model, criterion, optimizer, train_loader, validation_loader, hyps['num_epochs'])
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: self.model.eval() def train(self): for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append( average_batch_loss.data[0]) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') print('Save parameters at ', ckpt_path) torch.save(self.model.state_dict(), ckpt_path) def eval(self, epoch=None): # Load model parameters if not isinstance(epoch, int): epoch = self.config.epochs ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) self.model.load_state_dict(torch.load(ckpt_path)) loss_history = [] for _, batch in tqdm(enumerate(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append( average_batch_loss.data[0]) # Variable -> Tensor epoch_loss = np.mean(loss_history) print('Loss: {epoch_loss:.2f}')
shuffle=True) # 반복 작업을 위한 준비를 하는 문구. dataset = 데이터, batch_size = 한 번에 훈련시킬 데이터수, # num_workers = 데이터를 읽는데 사용할 cpu thread 갯수, shuffle=데이터 순서를 뒤섞을지 여부 if MODEL == 'CNN': from models import CNN model = CNN() elif MODEL == 'MLP': from models import MLP model = MLP() else: raise NotImplementedError("You need to choose among [CNN, MLP].") loss = nn.CrossEntropyLoss() # loss 객체로 CrossEntropyLoss 함수 선언 # CrossEntropy 실행 시 먼저 자동으로 softmax 를 실행함. optim = torch.optim.Adam(model.parameters(), lr=2e-4, betas=[0.5, 0.99]) # 옵티마이저 선언. / lr:러닝 레이트. 웨이트가 변할 때 gradient를 얼마나 반영할 지 정함. # beta1: std / beta2: Adam 공식의 분모에 들어갈 log(x) 의 x가 0이 되는 것을 막기 위해 부여하는 bias 수치. 맞나? 확인할 것. EPOCHS = 5 # 데이터 전체를 몇 번이나 이용해 학습할 지 저장하기 위한 객체 total_step = 0 # 총 몇 번의 학습이 일어났나 저장하기 위한 객체 list_loss = list() # loss 저장용 리스트 for epoch in trange(EPOCHS): # EPOCHS 만큼 실행할 for 문 for i, data in enumerate( data_loader): # data_loader 에서 선언한 data 와 그 인덱스 i를 선언 total_step += 1 # step을 1 늘림 input, label = data[0], data[ 1] # data_loader에서 읽은 data 객체의 인풋 데이터와 라벨 읽어옴 # input shape = [32,1,28,28] [batch size, channel, height, width] input = input.view(
n_epochs = 5000 batch_size_train = 32 batch_size_test = 1000 learning_rate = 0.01 momentum = 0.5 weight_decay = .001 random_seed = 1 torch.backends.cudnn.enabled = True torch.manual_seed(random_seed) network = CNN() network.cuda() gpu_available = "GPU available?: " + str(torch.cuda.is_available()) using_cuda = "Network using cuda?: " + str(next(network.parameters()).is_cuda) print(gpu_available) print(using_cuda) logging.info("\n\n------------------------------------------------------") logging.info(gpu_available) logging.info(using_cuda) logging.info(\ f"""\n\nNetwork Details: n_epochs = {n_epochs} batch_size_train = {batch_size_train} batch_size_test = {batch_size_test} learning_rate = {learning_rate} momentum = {momentum}
class TextClassifier: def __init__(self, paths, batch_size=6, iterations=50, initial_lr=0.003, hidden_size=256, dropout=0.2, kernel_sz=3): self.use_cuda = torch.cuda.is_available() self.device = torch.device('cuda:0' if self.use_cuda else 'cpu') self.data = DataReader(paths) self.data.set_training_data(batch_size, ('cuda:0' if self.use_cuda else 'cpu')) self.train_batch_loader = BatchGenerator(self.data.train_data, 'Sentence', 'Label') self.val_batch_loader = BatchGenerator(self.data.val_data, 'Sentence', 'Label') self.test_batch_loader = BatchGenerator(self.data.test_data, 'Sentence', 'Label') # Store hyperparameters self.batch_size = batch_size self.iterations = iterations self.initial_lr = initial_lr self.kernel_sz = kernel_sz # Create Model emb_size, emb_dim = self.data.TEXT.vocab.vectors.size() self.cnn_model = CNN(emb_size=emb_size, emb_dimension=emb_dim, n_out=len(self.data.LABEL.vocab), dropout=dropout, kernel_sz=kernel_sz, stride=1, padding=0, out_filters=hidden_size, pretrained_emb=self.data.TEXT.vocab.vectors) if self.use_cuda: self.cnn_model.cuda() def train(self): train_loss_hist = [] val_loss_hist = [] train_acc_hist = [] val_acc_hist = [] test_acc_hist = [] loss = 0.0 best_model = 0.0 for itr in range(self.iterations): print("\nIteration: " + str(itr + 1)) optimizer = optim.SGD(self.cnn_model.parameters(), lr=self.initial_lr) self.cnn_model.train() total_loss = 0.0 total_acc = 0.0 steps = 0 data_iter = iter(self.train_batch_loader) # For some reason using for loop on iterator (next) is missing the target variable (y) # Have to loop over the length and retrieve the batch_data inside the loop for i in range(len(self.train_batch_loader)): ((x_batch, x_len_batch), y_batch) = next(data_iter) # if torch.min(x_len_batch) > self.kernel_sz: optimizer.zero_grad() loss, logits = self.cnn_model.forward(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 loss.backward() optimizer.step() train_loss_hist.append(total_loss / steps) train_acc_hist.append(total_acc / len(self.data.trainds)) val_loss, val_acc = self.eval_model(self.val_batch_loader, len(self.data.valds)) val_loss_hist.append(val_loss) val_acc_hist.append(val_acc) if best_model < val_acc: best_model = val_acc test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.testds)) print("Train: {Loss: " + str(total_loss / steps) + ", Acc: " + str(total_acc / len(self.data.trainds)) + " }") print("Val: {Loss: " + str(val_loss) + ", Acc: " + str(val_acc) + " }") # test_loss, test_acc = self.eval_model(self.test_batch_loader, len(self.data.testds) ) test_acc_hist.append(test_acc) return train_loss_hist, train_acc_hist, val_loss_hist, val_acc_hist, test_acc def eval_model(self, batch_loader, N): self.cnn_model.eval() total_loss = 0.0 total_acc = 0.0 steps = 0 batch_iter = iter(batch_loader) with torch.no_grad(): for i in range(len(batch_loader)): ((x_batch, x_len_batch), y_batch) = next(batch_iter) loss, logits = self.cnn_model(x_batch, y_batch) acc = torch.sum(torch.argmax(logits, dim=1) == y_batch) total_loss += loss.item() total_acc += acc.item() steps += 1 return (total_loss / steps), (total_acc / N)
def hp_grid_search(model_type, lr_list, momentum_list, reg_list, batch_size_list, train_ds, valid_ds, optimizer, epochs, loss_type_list=["l1"], save_all_plots="No", save_final_plot="No", final_plot_prefix=None, return_all_loss=False): ''' model (numeric): initialized model to test lr_list (list of numeric): list of learning rates momentum_list (list of numeric): list of momentums reg_list (list of numeric): list of regularization penaltys batch_size_list (list of numeric): list of sizes of the batches train_ds: training dataset after using WaldoDataset valid_ds: validation dataset after using WaldoDataset loss_type_list (list of str): list of losses if you want to try more than one save_all_plots (str): Do you want to save every plot? default to "No" save_final_plot (str): if you just want to save the final plot. default to "No". Final plot will automaticall save if "save_all_plots"==Yes final_plot_prefix (str): provide a prefix for the final plot name ''' i = 0 all_loss_train = [] all_loss_valid = [] for lr in lr_list: for r in reg_list: for m in momentum_list: for b in batch_size_list: for loss_type in loss_type_list: print('HP ITERATION: ', i) i += 1 print('learning_rate: ', lr) print('regularization: ', r) print('momentum: ', m) print('batch_size: ', b) print('loss type: ', loss_type) param_str = "{0}_{1}_{2}_{3}_{4}".format( model_type, str(lr), str(r), str(m), str(b), loss_type) print(param_str) if loss_type == "l1": criterion = nn.L1Loss() if loss_type == "l2": criterion = nn.MSELoss() if model_type == "SimpleCNN": model = CNN() train_dl = DataLoader(train_ds, batch_size=b, shuffle=True) valid_dl = DataLoader(valid_ds, batch_size=b) optimizer = torch.optim.SGD(model.parameters(), lr, momentum=m, weight_decay=r) train_loss, valid_loss = train(model_type=model_type, model=model, optimizer=optimizer, train_dl=train_dl, valid_dl=valid_dl, epochs=epochs, criterion=criterion, return_loss=True, plot=True, verbose=True) all_loss_train.append(train_loss) all_loss_valid.append(valid_loss) plt.plot(valid_loss) plt.title('Validation Loss') plt.xlabel('Epoch') plt.ylabel('Perplexity') if save_all_plots == "Yes": print('./figures/V_{0}.png'.format(param_str)) plt.savefig( './figures/V_{0}.png'.format(param_str)) plt.show() plt.plot(train_loss) plt.title('Training Loss') plt.xlabel('Epoch') plt.ylabel('Loss') if save_all_plots == "Yes": plt.savefig( './figures/T_{0}.png'.format(param_str)) plt.show() for pt in all_loss_train: plt.plot(pt) plt.title('All Plots Training') plt.xlabel('Epoch') plt.ylabel('Loss') if save_final_plot == "Yes": plt.savefig( './figures/{0} All Training Loss.png'.format(final_plot_prefix)) plt.show() for pv in all_loss_valid: plt.plot(pv) plt.title('All Plots Validation') plt.xlabel('Epoch') plt.ylabel('Loss') if save_final_plot == "Yes": plt.savefig( './figures/{0}All Validation Loss.png'.format(final_plot_prefix)) plt.show() if return_all_loss == True: return all_loss_train, all_loss_valid
class Solver(object): def __init__(self, config, data_loader): self.config = config self.data_loader = data_loader def build(self, is_train): self.model = CNN(self.config) self.loss_fn = self.config.loss_fn() if is_train: self.model.train() self.optimizer = self.config.optimizer(self.model.parameters(), lr=self.config.lr) else: self.model.eval() def save(self, ckpt_path): """Save model parameters""" print('Save parameters at ', ckpt_path) torch.save(self.model.state_dict(), ckpt_path) def load(self, ckpt_path=None, epoch=None): """Load model parameters""" if not (ckpt_path or epoch): epoch = self.config.epochs if epoch: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch}.pkl') print('Load parameters from ', ckpt_path) self.model.load_state_dict(torch.load(ckpt_path)) def train(self): """Train model with training data""" for epoch in tqdm(range(self.config.epochs)): loss_history = [] for batch_i, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.data[0]) # Variable -> Tensor # Flush out remaining gradient self.optimizer.zero_grad() # Backpropagation average_batch_loss.backward() # Gradient descent self.optimizer.step() # Log intermediate loss if (epoch + 1) % self.config.log_every_epoch == 0: epoch_loss = np.mean(loss_history) log_str = f'Epoch {epoch + 1} | loss: {epoch_loss:.2f}\n' print(log_str) # Save model parameters if (epoch + 1) % self.config.save_every_epoch == 0: ckpt_path = os.path.join(self.config.save_dir, f'epoch-{epoch+1}.pkl') self.save(ckpt_path) def eval(self): """Evaluate model from text data""" n_total_data = 0 n_correct = 0 loss_history = [] import ipdb ipdb.set_trace() for _, batch in enumerate(tqdm(self.data_loader)): # text: [max_seq_len, batch_size] # label: [batch_size] text, label = batch.text, batch.label # [batch_size, max_seq_len] text.data.t_() # [batch_size, 2] logit = self.model(text) # Calculate loss average_batch_loss = self.loss_fn(logit, label) # [1] loss_history.append(average_batch_loss.data[0]) # Variable -> Tensor # Calculate accuracy n_total_data += len(label) # [batch_size] _, prediction = logit.max(1) n_correct += (prediction == label).sum().data epoch_loss = np.mean(loss_history) accuracy = n_correct / n_total_data print(f'Loss: {epoch_loss:.2f}') print(f'Accuracy: {accuracy}') def inference(self, text): text = Variable(torch.LongTensor([text])) # [batch_size, 2] logit = self.model(text) _, prediction = torch.max(logit) return prediction
def train(model_name="LSTM", params=None, embedding="Random"): # Parameters to tune print(params) batch_size = params["batch_size"] num_epochs = params["num_epochs"] oversample = params["oversample"] soft_labels = params["soft_labels"] if model_name == "LSTM": learning_rate = params["learning_rate"] hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] dropout = params["dropout"] combine = embedding == "Both" embedding_dim = 300 if combine: embedding = "Random" if model_name == "Bert": learning_rate = params["learning_rate"] num_warmup_steps = params["num_warmup_steps"] num_total_steps = params["num_total_steps"] embedding = "None" # Constants test_percentage = 0.1 val_percentage = 0.2 # Load data torch.manual_seed(42) dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, for_bert=(model_name == "Bert"), combine=combine) train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size) # Define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim=embedding_dim, combine=params["combine"], n_filters=params["filters"]) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader, val_loader, weights = load_data(oversample, train_data, val_data, batch_size, collate_fn=bert_collate) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) # cuda device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # optimiser scheduler = None optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"]) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # weighted cross entropy loss, by class counts of other classess weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) if soft_labels: criterion = weighted_soft_cross_entropy else: criterion = nn.CrossEntropyLoss(weight=weights) eval_criterion = nn.CrossEntropyLoss(weight=weights) for epoch in range(num_epochs): # train epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, scheduler=scheduler, weights=weights) # realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) # Compute F1 score on validation set - this is what we optimise during tuning loss, acc, predictions, ground_truth = evaluate_epoch(model, val_loader, eval_criterion, device, is_final=True) val_f1 = f1_score(y_true=ground_truth, y_pred=predictions, average="macro") print("Done") return val_f1
class Trainer: """ 训练 """ def __init__(self, _hparams): utils.set_seed(_hparams.fixed_seed) self.train_loader = get_train_loader(_hparams) self.val_loader = get_val_loader(_hparams) self.encoder = CNN().to(DEVICE) self.decoder = RNN(fea_dim=_hparams.fea_dim, embed_dim=_hparams.embed_dim, hid_dim=_hparams.hid_dim, max_sen_len=_hparams.max_sen_len, vocab_pkl=_hparams.vocab_pkl).to(DEVICE) self.loss_fn = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.get_params(), lr=_hparams.lr) self.writer = SummaryWriter() self.max_sen_len = _hparams.max_sen_len self.val_cap = _hparams.val_cap self.ft_encoder_lr = _hparams.ft_encoder_lr self.ft_decoder_lr = _hparams.ft_decoder_lr self.best_CIDEr = 0 def fine_tune_encoder(self, fine_tune_epochs, val_interval, save_path, val_path): print('*' * 20, 'fine tune encoder for', fine_tune_epochs, 'epochs', '*' * 20) self.encoder.fine_tune() self.optimizer = torch.optim.Adam([ { 'params': self.encoder.parameters(), 'lr': self.ft_encoder_lr }, { 'params': self.decoder.parameters(), 'lr': self.ft_decoder_lr }, ]) self.training(fine_tune_epochs, val_interval, save_path, val_path) self.encoder.froze() print('*' * 20, 'fine tune encoder complete', '*' * 20) def get_params(self): """ 模型需要优化的全部参数,此处encoder暂时设计不用训练,故不加参数 :return: """ return list(self.decoder.parameters()) def training(self, max_epochs, val_interval, save_path, val_path): """ 训练 :param val_path: 保存验证过程生成句子的路径 :param save_path: 保存模型的地址 :param val_interval: 验证的间隔 :param max_epochs: 最大训练的轮次 :return: """ print('*' * 20, 'train', '*' * 20) for epoch in range(max_epochs): self.set_train() epoch_loss = 0 epoch_steps = len(self.train_loader) for step, (img, cap, cap_len) in tqdm(enumerate(self.train_loader)): # batch_size * 3 * 224 * 224 img = img.to(DEVICE) cap = cap.to(DEVICE) self.optimizer.zero_grad() features = self.encoder.forward(img) outputs = self.decoder.forward(features, cap) outputs = pack_padded_sequence(outputs, cap_len - 1, batch_first=True)[0] targets = pack_padded_sequence(cap[:, 1:], cap_len - 1, batch_first=True)[0] train_loss = self.loss_fn(outputs, targets) epoch_loss += train_loss.item() train_loss.backward() self.optimizer.step() epoch_loss /= epoch_steps self.writer.add_scalar('epoch_loss', epoch_loss, epoch) print('epoch_loss: {}, epoch: {}'.format(epoch_loss, epoch)) if (epoch + 1) % val_interval == 0: CIDEr = self.validating(epoch, val_path) if self.best_CIDEr <= CIDEr: self.best_CIDEr = CIDEr self.save_model(save_path, epoch) def save_model(self, save_path, train_epoch): """ 保存最好的模型 :param save_path: 保存模型文件的地址 :param train_epoch: 当前训练的轮次 :return: """ model_state_dict = { 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'tran_epoch': train_epoch, } print('*' * 20, 'save model to: ', save_path, '*' * 20) torch.save(model_state_dict, save_path) def validating(self, train_epoch, val_path): """ 验证 :param val_path: 保存验证过程生成句子的路径 :param train_epoch: 当前训练的epoch :return: """ print('*' * 20, 'validate', '*' * 20) self.set_eval() sen_json = [] with torch.no_grad(): for val_step, (img, img_id) in tqdm(enumerate(self.val_loader)): img = img.to(DEVICE) features = self.encoder.forward(img) sens, _ = self.decoder.sample(features) sen_json.append({'image_id': int(img_id), 'caption': sens[0]}) with open(val_path, 'w') as f: json.dump(sen_json, f) result = coco_eval(self.val_cap, val_path) scores = {} for metric, score in result: scores[metric] = score self.writer.add_scalar(metric, score, train_epoch) return scores['CIDEr'] def set_train(self): self.encoder.train() self.decoder.train() def set_eval(self): self.encoder.eval() self.decoder.eval()
import torch import torchvision import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader if __name__ == '__main__': opt = Training_options().parse() if opt.model == 'cnn': from models import CNN net = CNN(opt) train_predictors, train_predictands = assemble_predictors_predictands( opt, train=True) train_dataset = ENSODataset(train_predictors, train_predictands) trainloader = DataLoader(train_dataset, batch_size=opt.batch_size) optimizer = optim.Adam(net.parameters(), lr=opt.lr) device = "cuda:0" if torch.cuda.is_available() else "cpu" net = net.to(device) best_loss = np.infty train_losses = [] net.train() criterion = nn.MSELoss() for epoch in range(opt.epoch): running_loss = 0.0 for i, data in enumerate(trainloader): batch_predictors, batch_predictands = data batch_predictands = batch_predictands.to(device) batch_predictors = batch_predictors.to(device) optimizer.zero_grad() predictions = net(batch_predictors).squeeze() loss = criterion(predictions, batch_predictands.squeeze())
if args.init_emb: assert emb.shape[1] == args.emb_dim emb = torch.Tensor(emb) else: emb = None if args.model == "cnn": model = CNN(len(data["word2idx"]), args.emb_dim, args.out_dim, args.window_dim, len(data["lbl2idx"]), args.dp, emb) if args.fix_emb: model.embedding.weight.requires_grad = False loss = torch.nn.CrossEntropyLoss() optim = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) if args.cuda: model.cuda() trainModel(args, model, loss, optim, trainData, valData) if args.submit: # load the best model saved during training model.load_state_dict( torch.load(args.path_savedir + "{}_{}.model".format(args.model, args.epochs))) model.eval() preds_val = predict(model, valData)
batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) model = CNN() if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() learning_rate = 0.1 writer.add_scalar("Learning_Rate", learning_rate) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) train(train_loader, test_loader, model, optimizer, criterion) # torch.save(model, "./CNN.pt") # model2 = torch.load("./CNN.pt") # torch.save(model.state_dict(), "./CNN_State.pt") # model2 = CNN() # model2.load_state_dict(torch.load("./CNN_State.pt")) # torch.save({ # "Learning Rate": learning_rate, # "model_state_dict": model.state_dict(), # "optimizer_state_dict": optimizer.state_dict(), # "batch size": batch_size, # "number of epochs": num_epochs # }, "./checkpoint.pt")
def train(): transforms = Compose([ToTensor()]) train_dataset = CaptchaData('./data/train', transform=transforms) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) test_data = CaptchaData('./data/test', transform=transforms) test_data_loader = DataLoader(test_data, batch_size=batch_size, num_workers=0, shuffle=True, drop_last=True) cnn = CNN() if torch.cuda.is_available(): cnn.cuda() if restor: cnn.load_state_dict(torch.load(model_path)) # freezing_layers = list(cnn.named_parameters())[:10] # for param in freezing_layers: # param[1].requires_grad = False # print('freezing layer:', param[0]) optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr) criterion = nn.MultiLabelSoftMarginLoss() for epoch in range(max_epoch): start_ = time.time() loss_history = [] acc_history = [] cnn.train() for img, target in train_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc = calculat_acc(output, target) acc_history.append(acc) loss_history.append(loss) print('train_loss: {:.4}|train_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) loss_history = [] acc_history = [] cnn.eval() for img, target in test_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) acc = calculat_acc(output, target) acc_history.append(acc) loss_history.append(float(loss)) print('test_loss: {:.4}|test_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_)) torch.save(cnn.state_dict(), model_path)
'pretrained_word_embeddings_file': pretrained_word_embeddings_file, 'transform_train': transform_train, 'transform_val': transform_val, 'WEIGHT_DECAY': WEIGHT_DECAY, 'ADAM_FLAG': ADAM_FLAG, 'RNN_DROPOUT':RNN_DROPOUT, 'CNN_DROPOUT': CNN_DROPOUT, 'GRAD_CLIP': GRAD_CLIP} print('Initializing models...') encoder = CNN(NO_WORD_EMBEDDINGS, pretrained_cnn_dir, freeze=True, dropout_prob=CNN_DROPOUT, model_name='resnet152') decoder = RNN(VOCAB_SIZE, NO_WORD_EMBEDDINGS, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, pre_trained_file=pretrained_word_embeddings_file, freeze=False, dropout_prob=RNN_DROPOUT) params['encoder'] = encoder params['decoder'] = decoder encoder.cuda() decoder.cuda() print('Initializing optimizer...') model_paras = list(encoder.parameters()) + list(decoder.parameters()) optimizer = optim.Adam(model_paras, lr=LR, weight_decay=WEIGHT_DECAY) params['optimizer'] = optimizer pickle.dump(params, open(init_params_file, 'wb')) # initialize accumulators. current_epoch = 1 batch_step_count = 1 time_used_global = 0.0 checkpoint = 1 # load lastest model to resume training
model = gmsCNN(kernelg=args.kernelg, kernels=args.kernels, kernel=args.kernel, num_filters=args.num_filters, rate=args.rate) fname = "models/gmsCNN_" + args.data + str(args.kernel) + "_" + str( args.kernelg) + "_" + str(args.kernels) + "_" + str( args.num_filters) + "_" + str(args.batch_size) + "_" + str( args.rate) + ".model" if args.gpu: model = model.cuda() # Training setup L = t.nn.CrossEntropyLoss() optimizer = t.optim.Adam(model.parameters(), lr=args.learn_rate) if not os.path.exists("models"): os.makedirs("models") # load to continue with pre-existing model if os.path.exists(fname): model.load_state_dict(t.load(fname)) print("Successfully loaded previous model " + str(fname)) # start with a model defined on 0 # train_mix, test, train_data, train_labels = dataFetch() # # select only 0 category # train_dataset = customDataset(train_data[0], train_labels[0]) # # # define train and test as DataLoaders
])) water = datasets.ImageFolder(root=WATER_DIRECTORY, transform=transforms.Compose([ transforms.ToTensor() ])) ship_loader = torch.utils.data.DataLoader(dataset=ship, batch_size=BATCH_SIZE, shuffle=True) water_loader = torch.utils.data.DataLoader(dataset=water, batch_size=BATCH_SIZE, shuffle=True) ### INIT MODEL device = torch.device("cpu") #change if on GPU, also need to use .cuda() model = CNN().to(device) ### MSE LOSS AND ADAM OPTIMIZER criterion = nn.MSELoss(size_average=True, reduce=True) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=True) ### TRAIN (SHIP == 1, WATER == 0) for epoch in range(EPOCHS): print(f"EPOCH {epoch}") correct, total = 0, 0 for idx, ((positive, _), (negative, __)) in enumerate(zip(ship_loader, water_loader)): #training on ship batch out_positive = model(positive) loss = criterion(out_positive, torch.ones(1)) optimizer.zero_grad() loss.sum().backward() optimizer.step()
train=False, download=False, transform=transforms.ToTensor()) '''model setup''' model = CNN( arch, channels, kernels, num_class, input_shape, #cnn parameters hidden_dims, activation, p_drop, batchnorm # mlp parameters ) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() print(f'The Model:\n{model}') '''train the model''' (train_loss, train_acc, test_loss, test_acc) =\ model_train(model, trainer, optimizer, criterion, tester=tester, batch_size=500, epochs=epochs) '''plot the performance''' performance_plot(train_loss, test_loss, 0.7, "loss", "Loss.jpeg") performance_plot(train_acc, test_acc, 0.7, "accuracy", "Accuracy.jpeg")
# batch_size : 한번에 처리할 데이터의 개수 # shuffle = True : 자료를 섞을 것인지 if MODEL == 'CNN': from models import CNN model = CNN() elif MODEL == 'MLP': from models import MLP model = MLP() print(10) else: raise NotImplementedError("You need to choose among [CNN, MLP].") loss = nn.CrossEntropyLoss() optim = torch.optim.Adam(model.parameters(), lr=2e-4, betas=(0.5,0.99), eps=1e-8) # parameters : wexight , optim : 그래디언트 전달 , lr: 계산되는 그래디언트의 값이 너무 크기 때문에 그래디언트에 붙이는 계수 EPOCHS = 1 # 전체 데이터의 학습을 몇번 시킬 것인가? total_step = 0 list_loss = list() for epoch in range(EPOCHS): for i, data in enumerate(data_loader): # enumerate : 반복문에서 index를 입력할 때 사용 total_step = total_step+1 input, label = data[0], data[1] # input shape [32, 1, 28, 28] 첫번째 배치사이즈, channel, height, width input = input.view(input.shape[0], -1) if MODEL == 'MLP' else input # # batchsize, channel * height * width 왜 이렇게 하는거지 ? → 1차원으로 바꾸기 위해서 # view? reshape? 메모리주소? classification_results = model.forward(input) # [bstch size, 10] #nn.module을 상속한 클래스는 forward를 생략해도된다.
def training_run_cnn(combination, criterion, train_loader, valid_loader, run): n_featuremap_1, n_featuremap_2, mode = combination model_path = "CNN_run_{}.pt".format(run) results[model_path] = dict() # initialize the network with the given configuration my_net = CNN(n_featuremap_1=n_featuremap_1, n_featuremap_2=n_featuremap_2) # initialize weights with the given mode my_net.apply(partial(init_weights, mode=mode)) my_net.to(device) optimizer = torch.optim.Adam(my_net.parameters()) for epoch in range(10): # loop over the training dataset multiple times training_loss = .0 pbar = tqdm(10) for batch_idx, (x, target) in enumerate(train_loader): x, target = x.to(device), target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = my_net(x).view(-1, 1) loss = criterion(outputs, target.view(-1, 1)) loss.backward() optimizer.step() if epoch == 9: # update training loss in the last epoch training_loss += loss.item() * len(x) if batch_idx % 100 == 99: # print every 100 mini-batches print("[ Epoch %d,Batch %2d] loss: %.3f" % (epoch + 1, batch_idx + 1, loss.item())) pbar.update(1) # update results results[model_path]["training_loss"] = training_loss / len(train) print("Finished Training !") print("Start Evaluating !") # Validation loss valid_loss = .0 correct = 0 thres = 0.5 with torch.no_grad(): for batch_idx, (x, target) in enumerate(valid_loader): x, target = x.to(device), target.to(device) outputs = my_net(x).view(-1, 1) prediction = outputs >= thres correct += prediction.eq(target.view(-1, 1)).sum().item() loss = criterion(outputs, target.view(-1, 1)) valid_loss += loss.item() * len(x) # update results results[model_path]["validation_loss"] = valid_loss / len(valid) results[model_path]["accuracy"] = correct / len(valid) # save model in disk torch.save(my_net.state_dict(), "./models/" + model_path)
def main(): torch.manual_seed(42) # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.01, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False} # Glove params = { 'batch_size': 32, 'dropout': 0, 'hidden_dim': 128, 'learning_rate': 0.001, 'num_epochs': 5, 'num_layers': 2, 'oversample': False, 'soft_labels': False } # Random #params = {'batch_size': 32, 'dropout': 0, 'hidden_dim': 256, 'learning_rate': 0.0001, 'num_epochs': 5, 'num_layers': 3, 'oversample': False, 'soft_labels': False} #some params experiment_number = 1 test_percentage = 0.1 val_percentage = 0.2 batch_size = params["batch_size"] num_epochs = 5 #params["num_epochs"] dropout = params["dropout"] embedding_dim = 300 model_name = "CNN" #'Bert' #"CNN" #"LSTM" unsupervised = True embedding = "Glove" #"Random" ##"Glove" # "Both" # soft_labels = False combine = embedding == "Both" # LSTM parameters if model_name == "LSTM": hidden_dim = params["hidden_dim"] num_layers = params["num_layers"] # Bert parameter num_warmup_steps = 100 num_total_steps = 1000 if model_name == "Bert": embedding = "None" if embedding == "Both": combine = True embedding = "Random" else: combine = False learning_rate = params["learning_rate"] #5e-5, 3e-5, 2e-5 oversample_bool = False weighted_loss = True # load data dataset = Dataset("../data/cleaned_tweets_orig.csv", use_embedding=embedding, embedd_dim=embedding_dim, combine=combine, for_bert=(model_name == "Bert")) #dataset.oversample() train_data, val_test_data = split_dataset(dataset, test_percentage + val_percentage) val_data, test_data = split_dataset( val_test_data, test_percentage / (test_percentage + val_percentage)) # print(len(train_data)) #save_data(train_data, 'train') #save_data(test_data, 'test') #define loaders if oversample_bool: weights, targets = get_loss_weights(train_data, return_targets=True) class_sample_count = [ 1024 / 20, 13426, 2898 / 2 ] # dataset has 10 class-1 samples, 1 class-2 samples, etc. oversample_weights = 1 / torch.Tensor(class_sample_count) oversample_weights = oversample_weights[targets] # oversample_weights = torch.tensor([0.9414, 0.2242, 0.8344]) #torch.ones((3))- sampler = torch.utils.data.sampler.WeightedRandomSampler( oversample_weights, len(oversample_weights)) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate, sampler=sampler) else: train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=my_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=my_collate) #define model if model_name == "CNN": vocab_size = len(dataset.vocab) model = CNN(vocab_size, embedding_dim, combine=combine) elif model_name == "LSTM": vocab_size = len(dataset.vocab) model = LSTM(vocab_size, embedding_dim, batch_size=batch_size, hidden_dim=hidden_dim, lstm_num_layers=num_layers, combine=combine, dropout=dropout) elif model_name == "Bert": model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=3) train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, collate_fn=bert_collate) val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, collate_fn=bert_collate) #device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #LOSS : weighted cross entropy loss, by class counts of other classess if weighted_loss: weights = torch.tensor([0.9414, 0.2242, 0.8344], device=device) else: weights = torch.ones(3, device=device) #weights = torch.tensor([1.0, 1.0, 1.0], device = device) #get_loss_weights(train_data).to(device) # not to run again criterion = nn.CrossEntropyLoss(weight=weights) if soft_labels: criterion = weighted_soft_cross_entropy #latent model if unsupervised: vocab_size = len(dataset.vocab) criterion = nn.CrossEntropyLoss(weight=weights, reduction='none') model = Rationalisation_model(vocab_size, embedding_dim=embedding_dim, model=model_name, batch_size=batch_size, combine=combine, criterion=criterion) if not model_name == "Bert": model.embedding.weight.data.copy_(dataset.vocab.vectors) if combine: model.embedding_glove.weight.data.copy_(dataset.glove.vectors) #model to device model.to(device) #optimiser optimizer = optim.Adam(model.parameters(), lr=learning_rate) if model_name == "Bert": optimizer = AdamW(model.parameters(), lr=learning_rate, correct_bias=False) # Linear scheduler for adaptive lr scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) else: scheduler = None plot_log = defaultdict(list) for epoch in range(num_epochs): #train and validate epoch_loss, epoch_acc = train_epoch(model, train_loader, optimizer, criterion, device, soft_labels=soft_labels, weights=weights, scheduler=scheduler, unsupervised=unsupervised) val_loss, val_acc = evaluate_epoch(model, val_loader, criterion, device, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) #save for plotting for name, point in zip( ["train_loss", "train_accuracy", "val_loss", "val_accuracy"], [epoch_loss, epoch_acc, val_loss, val_acc]): plot_log[f'{name}'] = point #realtime feel print(f'Epoch: {epoch+1}') print( f'\tTrain Loss: {epoch_loss:.5f} | Train Acc: {epoch_acc*100:.2f}%' ) print(f'\t Val. Loss: {val_loss:.5f} | Val. Acc: {val_acc*100:.2f}%') sample_sentences_and_z(model, train_loader, device, dataset.vocab) #save plot results_directory = f'plots/{experiment_number}' os.makedirs(results_directory, exist_ok=True) for name, data in plot_log.items(): save_plot(data, name, results_directory) #save model torch.save(model, os.path.join(results_directory, 'model_cnn.pth')) #confusion matrix and all that fun loss, acc, predictions, ground_truth = evaluate_epoch( model, val_loader, criterion, device, is_final=True, soft_labels=soft_labels, weights=weights, unsupervised=unsupervised) conf_matrix = confusion_matrix(ground_truth, predictions) class_report = classification_report(ground_truth, predictions) print('\nFinal Loss and Accuracy\n----------------\n') print(f'\t Val. Loss: {loss:.5f} | Val. Acc: {acc*100:.2f}%') print('\nCONFUSION MATRIX\n----------------\n') print(conf_matrix) print('\nCLASSSIFICATION REPORT\n----------------------\n') print(class_report) plot_confusion_matrix(ground_truth, predictions, classes=["Hate speech", "Offensive", "Neither"], normalize=False, title='Confusion matrix') plt.show()
def train(): transforms = Compose([Resize((height, width)), ToTensor()]) train_dataset = CaptchaData(train_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True) test_data = CaptchaData(test_data_path, num_class=len(alphabet), num_char=int(numchar), transform=transforms, alphabet=alphabet) test_data_loader = DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, shuffle=True, drop_last=True) cnn = CNN(num_class=len(alphabet), num_char=int(numchar), width=width, height=height) if use_gpu: cnn.cuda() optimizer = torch.optim.Adam(cnn.parameters(), lr=base_lr) criterion = nn.MultiLabelSoftMarginLoss() for epoch in range(max_epoch): start_ = time.time() loss_history = [] acc_history = [] cnn.train() for img, target in train_data_loader: img = Variable(img) target = Variable(target) if use_gpu: img = img.cuda() target = target.cuda() output = cnn(img) loss = criterion(output, target) optimizer.zero_grad() loss.backward() optimizer.step() acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) print('epoch:{},train_loss: {:.4}|train_acc: {:.4}'.format( epoch, torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) loss_history = [] acc_history = [] cnn.eval() for img, target in test_data_loader: img = Variable(img) target = Variable(target) if torch.cuda.is_available(): img = img.cuda() target = target.cuda() output = cnn(img) acc = calculat_acc(output, target) acc_history.append(float(acc)) loss_history.append(float(loss)) print('test_loss: {:.4}|test_acc: {:.4}'.format( torch.mean(torch.Tensor(loss_history)), torch.mean(torch.Tensor(acc_history)), )) print('epoch: {}|time: {:.4f}'.format(epoch, time.time() - start_)) torch.save(cnn.state_dict(), os.path.join(model_path, "model_{}.path".format(epoch)))
def create_cifar_model(ema=False): model = CNN() if ema: for param in model.parameters(): param.detach_() return model
def main(args): args.logdir = args.logdir + args.mode if not os.path.exists(args.logdir): os.makedirs(args.logdir) logger = get_logger(os.path.join(args.logdir, 'train_source.log')) logger.info(args) # data # source_transform = transforms.Compose([ # transforms.ToTensor()] # ) # source_dataset_train = SVHN( # './input', 'train', transform=source_transform, download=True) # source_dataset_test = SVHN( # './input', 'test', transform=source_transform, download=True) # source_train_loader = DataLoader( # source_dataset_train, args.batch_size, shuffle=True, # drop_last=True, # num_workers=args.n_workers) # source_test_loader = DataLoader( # source_dataset_test, args.batch_size, shuffle=False, # num_workers=args.n_workers) source_dataset_name = 'MNIST' target_dataset_name = 'mnist_m' source_image_root = os.path.join('dataset', source_dataset_name) target_image_root = os.path.join('dataset', target_dataset_name) batch_size = 128 image_size = 28 img_transform_source = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.1307, ), std=(0.3081, )) ]) dataset_source_train = datasets.MNIST(root='dataset', train=True, transform=img_transform_source, download=True) source_train_loader = torch.utils.data.DataLoader( dataset=dataset_source_train, batch_size=batch_size, shuffle=True, num_workers=8) dataset_source_test = datasets.MNIST( root='dataset', train=False, transform=img_transform_source, ) source_test_loader = torch.utils.data.DataLoader( dataset=dataset_source_test, batch_size=batch_size, shuffle=False, num_workers=8) # train source CNN source_cnn = CNN(in_channels=args.in_channels).to(args.device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(source_cnn.parameters(), lr=args.lr, weight_decay=args.weight_decay) source_cnn = train_source_cnn(source_cnn, source_train_loader, source_test_loader, criterion, optimizer, args=args)