def train(model, train_dataset, valid_dataset, pad_index, epochs, batch_size, pool_size, optimizer, clip, log_every, valid_every): print("start training") logger = buildLogger("log/train.log", "train") batch_step = 0 # record the number of processed batch timestamp = datetime.now().strftime('%Y-%m-%d-%H:%M') tb_writer = SummaryWriter(f'log/{timestamp}') max_valid_mrr = 0 for epoch in range(epochs): itr_start_time = time.time() index = 0 losses = [] while(index < len(train_dataset)): model.train() batch = get_batch(train_dataset, index, batch_size, pad_index) index += batch_size optimizer.zero_grad() # clear gradient loss = model(*batch) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) # set threshold to avoid gradient vanish optimizer.step() # update network paramenters losses.append(loss.item()) if batch_step % log_every == 0 and batch_step : # record loss elapsed = time.time()-itr_start_time logger.info("epoch:{} bath_step:{} step_time:{:.3f}s loss:{:.3f}".format(epoch, batch_step, elapsed, np.mean(losses))) if tb_writer: tb_writer.add_scalar('loss', np.mean(losses), batch_step*batch_size) losses = [] itr_start_time = time.time() if batch_step % valid_every == 0 and batch_step: # validate print("validating....") acc, mrr, map, ndcg = validate(valid_dataset, model, pool_size, pad_index) logger.info(f'acc:{acc},mrr:{mrr},map:{map},ndcg:{ndcg}') if tb_writer: tb_writer.add_scalar('acc', acc, batch_step) tb_writer.add_scalar('mrr', mrr, batch_step) tb_writer.add_scalar('map', map, batch_step) tb_writer.add_scalar('ndcg', ndcg, batch_step) if mrr > max_valid_mrr: # update best model logger.info(f"max_valid_mrr{mrr}") max_valid_mrr = mrr torch.save(model.state_dict(), "log/code_search.pt") batch_step += 1
def _setup(self, config): self.name = args.Dataset_name data = self._get_dataset(self.name) self.c = config["c"] self.gamma = config["gamma"] train_loader = DataLoader(data, batch_size=len(data)) train = next(iter(train_loader))[0].numpy() label = next(iter(train_loader))[1].numpy() train = train.reshape(train.shape[0], -1) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( train, label, test_size=0.2, random_state=42) self.seed_table = np.array(["", "c", "gamma", "accuracy"]) loggername = '{}.log'.format(self.name) self.logger = utils.buildLogger(loggername) self.logger.info("Date setup Done")
import time import numpy as np from gensim.models.word2vec import Word2Vec from torch.autograd import Variable from torch.utils.data import DataLoader import os import sys from config import batch_size, hidden_dim, feature_dim, use_gpu, epochs, gap from model import rnnModel from tqdm import tqdm from utils import buildLogger, getBatch if __name__ == '__main__': train_dataset = pd.read_pickle('dataset/train.pkl') model = rnnModel(hidden_dim, feature_dim, batch_size) logger = buildLogger("log/train.log", "train") if use_gpu: model.cuda() parameters = model.parameters() optimizer = torch.optim.Adamax(parameters) loss_function = torch.nn.MSELoss() total_len = len(train_dataset) print("start training...") for epoch in range(epochs): start_time = time.time() total_loss = 0 for i in tqdm(range(0, len(train_dataset) - batch_size, batch_size)):
def _setup(self, config): torch.cuda.manual_seed(1) data_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = torchvision.datasets.MNIST('~/data', train=True, transform=data_transforms) num_total = len(dataset) shuffle = np.random.permutation(num_total) split_val = int(num_total * 0.2) train_idx, valid_idx = shuffle[split_val:], shuffle[:split_val] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) self.trainset_ld = DataLoader(dataset, batch_size=256, sampler=train_sampler, num_workers=4) self.validset_ld = DataLoader(dataset, batch_size=256, sampler=valid_sampler, num_workers=4) self.modelname = './{}.pth.tar'.format("simplenet") loggername = self.modelname.replace("pth.tar", "log") self.logger = utils.buildLogger(loggername) # ---- hyperparameters ---- lr = config["lr"] momentum = config["momentum"] weight_decay = config["weight_decay"] factor = config["factor"] self.epochID = 0 self.loss = F.nll_loss self.accuracy = -999999999999.0 # -------------------- SETTINGS: NETWORK ARCHITECTURE self.model = simplenet().cuda() #self.model = torch.nn.DataParallel(self.model).cuda() self.logger.info("Build Model Done") # -------------------- SETTINGS: OPTIMIZER & SCHEDULER -------------------- self.optimizer = optim.SGD(filter(lambda x: x.requires_grad, self.model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=False) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=factor, patience=10, mode='min') self.logger.info("Build Optimizer Done")
def _setup(self, config): random.seed(50) np.random.seed(50) torch.cuda.manual_seed_all(50) torch.manual_seed(50) self.total_time = time.time() self.name = args.Dataset_name nnArchitecture = args.Network_name dataset, num_class, input_size = self._get_dataset(self.name) num_total = len(dataset) shuffle = np.random.permutation(num_total) split_val = int(num_total * 0.2) train_idx, valid_idx = shuffle[split_val:], shuffle[:split_val] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) self.trainset_ld = DataLoader(dataset, batch_size=256, sampler=train_sampler, num_workers=4) self.validset_ld = DataLoader(dataset, batch_size=256, sampler=valid_sampler, num_workers=4) self.modelname = '{}--{}.pth.tar'.format(self.name, nnArchitecture) loggername = self.modelname.replace("pth.tar", "log") self.logger = utils.buildLogger(loggername) self.seed_table = np.array([ "", "epoch", "lr", "momentum", "weight_decay", "factor", "outLoss", "accuracy" ]) # ---- hyperparameters ---- self.lr = config["lr"] self.momentum = config["momentum"] self.weight_decay = config["weight_decay"] self.factor = config["factor"] self.epochID = 0 self.loss = nn.CrossEntropyLoss() self.accuracy = -999999999999.0 # -------------------- SETTINGS: NETWORK ARCHITECTURE if nnArchitecture == 'Vgg11': self.model = Vgg11(num_class, input_size).cuda() elif nnArchitecture == 'Resnet18': self.model = Resnet18(num_class, input_size).cuda() elif nnArchitecture == 'MobileNet': self.model = MobileNet(num_class, input_size).cuda() elif nnArchitecture == 'MobileNet_V2': self.model = MobileNet_V2(num_class, input_size).cuda() else: self.model = None assert 0 self.model = torch.nn.DataParallel(self.model).cuda() self.logger.info("Build Model Done") # -------------------- SETTINGS: OPTIMIZER & SCHEDULER -------------------- self.optimizer = optim.SGD(filter(lambda x: x.requires_grad, self.model.parameters()), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay, nesterov=False) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, factor=self.factor, patience=10, mode='min') self.logger.info("Build Optimizer Done")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--Dataset_name', type=str, default='') parser.add_argument('--Network_name', type=str, default='') parser.add_argument('--lr', type=float, default='') parser.add_argument('--momentum', type=float, default='') parser.add_argument('--factor', type=float, default='') parser.add_argument('--weight_decay', type=float, default='') args, unparsed = parser.parse_known_args() name = args.Dataset_name nnArchitecture = args.Network_name dataset, num_class, input_size = get_dataset(name) num_total = len(dataset) shuffle = np.random.permutation(num_total) split_val = int(num_total * 0.2) train_idx, valid_idx = shuffle[split_val:], shuffle[:split_val] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) trainset_ld = DataLoader(dataset, batch_size=256, sampler=train_sampler, num_workers=4) validset_ld = DataLoader(dataset, batch_size=256, sampler=valid_sampler, num_workers=4) modelname = '{}--{}--{:.6f}.pth.tar'.format(name, nnArchitecture, args.weight_decay) dirpath = os.path.join("./test_nn/", modelname.replace(".pth.tar", "")) if not os.path.exists(dirpath): os.makedirs(dirpath) loggername = os.path.join(dirpath, modelname.replace("pth.tar", "log")) logger = utils.buildLogger(loggername) writer = SummaryWriter(dirpath) seed_table = np.array( [["train_accuracy", "train_loss", "valid_accuracy", "valid_loss"]]) # ---- hyperparameters ---- lr = args.lr momentum = args.momentum weight_decay = args.weight_decay factor = args.factor epoch = 50 loss = nn.CrossEntropyLoss() # -------------------- SETTINGS: NETWORK ARCHITECTURE if nnArchitecture == 'Vgg11': model = Vgg11(num_class, input_size).cuda() elif nnArchitecture == 'Resnet18': model = Resnet18(num_class, input_size).cuda() elif nnArchitecture == 'MobileNet_V2': model = MobileNet_V2(num_class, input_size).cuda() else: model = None assert 0 model = torch.nn.DataParallel(model).cuda() logger.info("Build Model Done") # -------------------- SETTINGS: OPTIMIZER & SCHEDULER -------------------- optimizer = optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=False) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor, patience=10, mode='min') logger.info("Build Optimizer Done") for epochID in range(0, epoch): model.train() losstra = 0 losstraNorm = 0 correct = 0 num_samples = 0 for batchID, (input, target) in enumerate(trainset_ld): varInput = Variable(input).cuda(async=True) varTarget = Variable(target).cuda(async=True) varOutput = model(varInput) lossvalue = loss(varOutput, varTarget) pred = varOutput.argmax(1) correct += (pred == varTarget).sum().cpu() losstra += lossvalue.item() losstraNorm += 1 num_samples += len(input) optimizer.zero_grad() lossvalue.backward() torch.nn.utils.clip_grad_value_(model.parameters(), 10) optimizer.step() train_outLoss = losstra / losstraNorm train_accuracy = correct.item() / num_samples model.eval() lossVal = 0 lossValNorm = 0 correct = 0 num_samples = 0 for batchID, (input, target) in enumerate(validset_ld): with torch.no_grad(): varInput = Variable(input).cuda(async=True) varTarget = Variable(target).cuda(async=True) varOutput = model(varInput) losstensor = loss(varOutput, varTarget) pred = varOutput.argmax(1) correct += (pred == varTarget).sum().cpu() lossVal += losstensor.item() lossValNorm += 1 num_samples += len(input) valid_outLoss = lossVal / lossValNorm valid_accuracy = correct.item() / num_samples writer.add_scalar('train_accuracy', train_accuracy, epochID) writer.add_scalar('train_loss', train_outLoss, epochID) writer.add_scalar('valid_accuracy', valid_accuracy, epochID) writer.add_scalar('valid_loss', valid_outLoss, epochID) scheduler.step(valid_outLoss, epoch=epochID) logger.info('Epoch [' + str(epochID + 1) + '] loss= {:.5f}'.format(valid_outLoss) + ' ---- accuracy= {:.5f}'.format(valid_accuracy) + ' ---- model: {}'.format(modelname)) seed_table = np.append(seed_table, [[ str(train_accuracy), str(train_outLoss), str(valid_accuracy), str(valid_outLoss) ]], axis=0) np.savetxt(os.path.join(dirpath, "seed(50).csv"), seed_table, delimiter=',', fmt="%s")
def main(): torch.cuda.manual_seed(1) data_transforms = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) dataset = torchvision.datasets.MNIST('~/data', train=True, transform=data_transforms) num_total = len(dataset) shuffle = np.random.permutation(num_total) split_val = int(num_total * 0.2) train_idx, valid_idx = shuffle[split_val:], shuffle[:split_val] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) trainset_ld = DataLoader(dataset, batch_size=256, sampler=train_sampler, num_workers=4) validset_ld = DataLoader(dataset, batch_size=256, sampler=valid_sampler, num_workers=4) modelname = './{}.pth.tar'.format("simplenet") loggername = modelname.replace("pth.tar", "log") logger = utils.buildLogger(loggername) # ---- hyperparameters ---- lr = 0.1 momentum = 0.99 weight_decay = 0.1 factor = 0.999 epoch = 10 loss = F.nll_loss accuracy = -999999999999.0 # -------------------- SETTINGS: NETWORK ARCHITECTURE model = simplenet().cuda() model = torch.nn.DataParallel(model).cuda() logger.info("Build Model Done") # -------------------- SETTINGS: OPTIMIZER & SCHEDULER -------------------- optimizer = optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=lr, momentum=momentum, weight_decay=weight_decay, nesterov=False) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=factor, patience=10, mode='min') logger.info("Build Optimizer Done") for epochID in range(0, epoch): model.train() for batchID, (input, target) in enumerate(trainset_ld): varInput = Variable(input).cuda(async=True) varTarget = Variable(target).cuda(async=True) varOutput = model(varInput) lossvalue = loss(varOutput, varTarget) optimizer.zero_grad() lossvalue.backward() optimizer.step() start_time = time.time() model.eval() lossVal = 0 lossValNorm = 0 correct = 0 num_samples = 0 for batchID, (input, target) in enumerate(validset_ld): with torch.no_grad(): varInput = Variable(input).cuda(async=True) varTarget = Variable(target).cuda(async=True) varOutput = model(varInput) losstensor = loss(varOutput, varTarget) pred = varOutput.argmax(1) correct += (pred == varTarget).sum().cpu() lossVal += losstensor.item() lossValNorm += 1 num_samples += len(input) outLoss = lossVal / lossValNorm accuracy = correct.item() / num_samples scheduler.step(outLoss, epoch=epochID) if accuracy > accuracy: accuracy = accuracy logger.info('Epoch [' + str(epochID + 1) + '] [save] loss= {:.5f}'.format(outLoss) + ' ---- accuracy= {:.5f}'.format(accuracy) + ' ---- best_accuracy= {:.5f}'.format(accuracy) + ' ---- model: {}'.format(modelname) + ' ---- time: {:.1f} s'.format((time.time() - start_time)))