def check_new_settings(self, settings): if settings: self.settings = apply_new_settings(settings, self.settings) print("[{}]新设置已起效".format(datetime.datetime.now())) else: print("[{}]正在使用默认设置".format(datetime.datetime.now())) print_settings(self.settings)
def main(_): utils.print_settings(FLAGS) logging.info('#' * 60) logging.info('Current mode is {0}'.format(FLAGS.mode)) logging.info('#' * 60 + '\n') if FLAGS.mode == 1: vae_net = vae.VariationalAutoEncoder(FLAGS, data['doc_contents'], data['vocab']) vae_net.pretrain() elif FLAGS.mode == 2: nrtm = models.NRTM(FLAGS, data['doc_contents'], data['train_links_neg'], data['train_labels_neg'], data['test_links'], data['test_links_hit'], data['vocab'], data['links']) nrtm.load_model(FLAGS.pretrain_dir) nrtm.train()
y_train = to_categorical(np.asarray(train_labels)) y_test = test_labels # Make and print the settings for the DL model max_len = utils.get_max_len_info(train_data) emb_types = ['keras', 'glove', 'random'] trainable = True plot = True shuffle = False epochs = 50 batch_size = 256 embedding_dim = 300 hidden_units = 256 dropout = 0.3 for emb_type in emb_types: utils.print_settings(max_len, embedding_dim, hidden_units, epochs, batch_size, dropout, emb_type, trainable) if shuffle: print("DATA HAS BEEN SHUFFLED.") else: print("Data is in its normal order (NO shuffling).") # List of the models to be analysed models = ['Standard', 'LSTM', 'Attention'] # Run model run_dl_analysis(train_data, test_data, y_train, y_test, path, shuffle, max_len, emb_type, trainable, plot, models, epochs, batch_size, embedding_dim, hidden_units, dropout)
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group( backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank, ) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) elif torch.cuda.device_count() == 1: model = model.cuda() else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith("alexnet") or args.arch.startswith("vgg"): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD( model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = "cuda:{}".format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint["epoch"] best_acc1 = checkpoint["best_acc1"] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # print settings print_settings(model, args) cudnn.benchmark = True # for fast run # Data loading code traindir = os.path.join(IMAGENET_PATH, "train") valdir = os.path.join(IMAGENET_PATH, "val") normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, ) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]), ), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, ) if args.evaluate: validate(val_loader, model, criterion, args) return # recording settings models_path = "../logs/models/{}/".format(args.exp_name) tb_path = "../logs/tb/{}".format(args.exp_name) # TB: tensorboard # tensorboardX writer = SummaryWriter(log_dir=tb_path) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # decay by 10 every 20 epoch # train for one epoch train_time = time.time() loss, acc1, acc5 = train(train_loader, model, criterion, optimizer, epoch, args) mins = (time.time() - train_time) / 60 print("Training time: {:.4f}mins".format(mins)) # record the values in tensorboard writer.add_scalar("loss/train", loss, epoch + 1) # average loss writer.add_scalar("acc1/train", acc1, epoch + 1) # average acc@1 writer.add_scalar("acc5/train", acc5, epoch + 1) # average acc@5 # evaluate on validation set val_time = time.time() loss, acc1, acc5 = validate(val_loader, model, criterion, args) mins = (time.time() - val_time) / 60 print("Validation time: {:.4f}mins".format(mins)) # record the values in tensorboard writer.add_scalar("loss/val", loss, epoch + 1) # average loss writer.add_scalar("acc1/val", acc1, epoch + 1) # average acc@1 writer.add_scalar("acc5/val", acc5, epoch + 1) # average acc@5 # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, models_path, epoch + 1, ) if (epoch + 1) % 10 == 0: # save model every 10 epochs save_model( { "epoch": epoch + 1, "arch": args.arch, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, models_path, epoch + 1, ) writer.close() # close tensorboardX writer
def main(): args = parser.parse_args() if args.exp_name == "": print( "ERROR: USE '--exp-name' or '-n' option to define this experiment's name." ) sys.exit() # directories settings os.makedirs("../logs/outputs", exist_ok=True) os.makedirs("../logs/models/{}".format(args.exp_name), exist_ok=True) OUTPUT_PATH = "../logs/outputs/{}.log".format(args.exp_name) MODEL_PATH = "../logs/models/{}/".format(args.exp_name) if not args.resume and os.path.exists(OUTPUT_PATH): print( "ERROR: This '--exp-name' is already used. \ Use another name for this experiment." ) sys.exit() # recording outputs sys.stdout = open(OUTPUT_PATH, "a") sys.stderr = open(OUTPUT_PATH, "a") # tensorboardX writer = SummaryWriter(log_dir="../logs/tb/{}".format(args.exp_name)) # cuda settings args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda:0" if args.cuda else "cpu") print("device: {}".format(device)) # for fast training cudnn.benchmark = True np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # data settings trainloader, testloader = load_data(batch_size=args.batch_size) # Model, Criterion, Optimizer model = load_model(args.arch) # remember the number of final outputs is 16. model.to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.SGD( model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay ) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint["epoch"] model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) print( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint["epoch"] ) ) else: print("=> no checkpoint found at '{}'".format(args.resume)) # print settings print_settings(model, args) # training sigma_blur = args.sigma # save sigma of blur (for reversed-single-step) print("Start Training...") train_time = time.time() for epoch in range( args.start_epoch, args.epochs ): # loop over the dataset multiple times # blur settings if args.mode == "normal": args.sigma = 0 elif args.mode == "multi-steps-cbt": # args.sigma = adjust_sigma(epoch, args) # sigma decay every 5 epoch args.sigma = adjust_multi_steps_cbt(args.sigma, epoch, args.cbt_rate) elif args.mode == "multi-steps": args.sigma = adjust_multi_steps(epoch) elif args.mode == "single-step": if epoch >= args.epochs // 2: args.sigma = 0 # no blur elif args.mode == "fixed-single-step": if epoch >= args.epochs // 2: args.sigma = 0 # no blur # fix parameters of 1st Conv layer model.features[0].weight.requires_grad = False model.features[0].bias.requires_grad = False elif args.mode == "reversed-single-step": if epoch < args.epochs // 2: args.sigma = 0 else: args.sigma = sigma_blur adjust_learning_rate(optimizer, epoch, args) # decay by 10 every 20 epoch # ===== train mode ===== train_acc = AverageMeter("train_acc", ":6.2f") train_loss = AverageMeter("train_loss", ":.4e") model.train() for data in trainloader: # get the inputs; data is a list of [inputs, labels] inputs, labels = data[0], data[1].to(device) # Blur images if args.mode == "mix": half1, half2 = inputs.chunk(2) # blur first half images half1 = GaussianBlurAll(half1, args.sigma) inputs = torch.cat((half1, half2)) elif args.mode == "random-mix": half1, half2 = inputs.chunk(2) # blur first half images half1 = RandomGaussianBlurAll(half1, args.min_sigma, args.max_sigma) inputs = torch.cat((half1, half2)) else: inputs = GaussianBlurAll(inputs, args.sigma) inputs = inputs.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + record outputs = model(inputs) loss = criterion(outputs, labels) acc1 = accuracy(outputs, labels, topk=(1,)) train_loss.update(loss.item(), inputs.size(0)) train_acc.update(acc1[0], inputs.size(0)) # backward + optimize loss.backward() optimizer.step() # record the values in tensorboard writer.add_scalar("loss/train", train_loss.avg, epoch + 1) # average loss writer.add_scalar("acc/train", train_acc.avg, epoch + 1) # average acc # ===== val mode ===== val_acc = AverageMeter("val_acc", ":6.2f") val_loss = AverageMeter("val_loss", ":.4e") model.eval() with torch.no_grad(): for data in testloader: inputs, labels = data[0], data[1].to(device) if args.blur_val: inputs = GaussianBlurAll(inputs, args.sigma) inputs = inputs.to(device) outputs = model(inputs) loss = criterion(outputs, labels) acc1 = accuracy(outputs, labels, topk=(1,)) val_loss.update(loss.item(), inputs.size(0)) val_acc.update(acc1[0], inputs.size(0)) # record the values in tensorboard writer.add_scalar("loss/val", val_loss.avg, epoch + 1) # average loss writer.add_scalar("acc/val", val_acc.avg, epoch + 1) # average acc # ===== save the model ===== if (epoch + 1) % 10 == 0: save_model( { "epoch": epoch + 1, "arch": args.arch, "val_loss": val_loss.avg, "val_acc": val_acc.avg, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), }, MODEL_PATH, epoch + 1, ) print("Finished Training") print("Training time elapsed: {:.4f}mins".format((time.time() - train_time) / 60)) print() writer.close() # close tensorboardX writer
default=10, type=int, help='sequence length') args = parser.parse_args() embedding_dim = args.dim batch_size = args.batch epochs = args.epochs dropout = args.dropout learning_rate = args.learning dense_units = args.dense seq_length = args.seq use_lstm = args.lstm utils.print_settings(embedding_dim, epochs, batch_size, dropout, learning_rate, dense_units, seq_length, use_lstm) path = os.getcwd()[:os.getcwd().rfind("/")] emoji_positive = path + "/res/emoji_positive_samples.txt" emoji_negative = path + "/res/emoji_negative_samples.txt" emoji_freq = path + "/res/emoji_frequencies.txt" emoji2vec_visualization = path + "/models/emoji_emb_viz_%dd.csv" % embedding_dim emoji2vec_weights = path + "/models/weights_%dd.h5" % embedding_dim emoji2vec_embeddings = path + "/models/emoji_embeddings_%dd.txt" % embedding_dim glove_filename = path + "/res/glove/" + "glove.6B.%dd.txt" % embedding_dim # Visualize the TSNE representation of the emoji embeddings def visualize_emoji_embeddings(top=300): # Get the most popular emojis and only plot those popular_emojis = [line.split()[0]