def __init__(self, vocab_size, device=DEVICE): super().__init__() self.vocab_size = vocab_size self.device = device self.embed = nn.Embedding(num_embeddings=vocab_size, embedding_dim=D_WORD).to(self.device) self.emb_dropout = nn.Dropout(P_DROP).to(self.device) with warnings.catch_warnings(): warnings.simplefilter('ignore') self.rnn = nn.LSTM( input_size=D_WORD, hidden_size=D_HIDDEN // 2, # bidirectional batch_first=True, dropout=P_DROP, bidirectional=True).to(self.device) # Initial cell and hidden state for each sequence hidden0_weights = torch.randn(D_HIDDEN // 2) self.hidden0 = nn.Parameter(hidden0_weights.to(self.device), requires_grad=True) cell0_weights = torch.randn(D_HIDDEN // 2) self.cell0 = nn.Parameter(cell0_weights.to(self.device), requires_grad=True) p_trainable, p_non_trainable = count_params(self) print( f'Text encoder params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def __init__(self): super().__init__() self.img = nn.Sequential(conv3x3(D_GF, 3), nn.Tanh()) p_trainable, p_non_trainable = count_params(self) print( f'Image output params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def __init__(self): super().__init__() self.encoder = downscale16_encoder_block() self.logit = DiscriminatorLogitBlock() p_trainable, p_non_trainable = count_params(self) print( f'Discriminator64 params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def __init__(self): super().__init__() self.downscale_encoder_16 = downscale16_encoder_block() self.downscale_encoder_32 = downscale2_encoder_block( D_DF * 8, D_DF * 16) self.encoder32 = conv3x3_LReLU(D_DF * 16, D_DF * 8) self.logit = DiscriminatorLogitBlock() p_trainable, p_non_trainable = count_params(self) print( f'Discriminator128 params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def init_model(args, field, logger, world_size, device): logger.info(f'Initializing {args.model}') Model = getattr(models, args.model) model = Model(field, args) params = get_trainable_params(model) num_param = count_params(params) logger.info(f'{args.model} has {num_param:,} trainable parameters') model.to(device) if world_size > 1: logger.info(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def __init__(self, use_self_attention=False): super().__init__() self.residuals = nn.Sequential( *[Residual(D_GF * 2) for _ in range(RESIDUALS)]) self.attn = Attention(D_GF, D_HIDDEN) self.upsample = upsample_block(D_GF * 2, D_GF) self.use_self_attention = use_self_attention if self.use_self_attention: self.self_attn = self_attn_block() p_trainable, p_non_trainable = count_params(self) print( f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def init_model(world_size): model = MultitaskQuestionAnsweringNetwork() if os.path.isfile('model.pth'): print('load pretrained model') model.load_state_dict(torch.load('model.pth')) else: print('new model ') params = get_trainable_params(model) num_param = count_params(params) print(f'model has {num_param:,} parameters') if world_size > 1: print(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def __init__(self): super().__init__() self.d_gf = D_GF * 16 self.fc = nn.Sequential( nn.Linear(D_Z + D_COND, self.d_gf * 4 * 4 * 2, bias=False), nn.BatchNorm1d(self.d_gf * 4 * 4 * 2), nn.modules.activation.GLU(dim=1)) self.upsample_steps = nn.Sequential(*[ upsample_block(self.d_gf // (2**i), self.d_gf // (2**(i + 1))) for i in range(4) ]) p_trainable, p_non_trainable = count_params(self) print( f'Generator0 params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def init_model(args, field, logger, world_size): logger.info(f'Initializing {args.model}') Model = getattr(models, args.model) model = Model(field, args) # 模型初始化 params = get_trainable_params(model) num_param = count_params(params) # 计算模型参数个数 logger.info(f'{args.model} has {num_param:,} parameters') if args.gpus[0] > -1: model.cuda() # 是否使用gpu设置的地方,如果设置为-1或者更负,就不使用gpu,只用cpu if world_size > 1: logger.info(f'Wrapping model for distributed') model = DistributedDataParallel(model) model.params = params return model
def __init__(self, device=DEVICE): super().__init__() self.device = device self.inception_model = torchvision.models.inception_v3( pretrained=True).to(self.device).eval() # Freeze Inception V3 parameters freeze_params_(self.inception_model) # 768: the dimension of mixed_6e layer's sub-regions (768 x 289 [number of sub-regions, 17 x 17]) self.local_proj = conv1x1(768, D_HIDDEN).to(self.device) # 2048: the dimension of last average pool's output self.global_proj = nn.Linear(2048, D_HIDDEN).to(self.device) self.local_proj.weight.data.uniform_(-IMG_WEIGHT_INIT_RANGE, IMG_WEIGHT_INIT_RANGE) self.global_proj.weight.data.uniform_(-IMG_WEIGHT_INIT_RANGE, IMG_WEIGHT_INIT_RANGE) p_trainable, p_non_trainable = count_params(self) print( f'Image encoder params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
def __init__(self, embedding_dim, input_dims, hidden_dim, num_slots, encoder='cswm', cnn_size='small', decoder='broadcast', trans_model='gnn', identity_action=False, residual=False, canonical=False): super(NodModel, self).__init__() self.embedding_dim = embedding_dim self.input_dims = input_dims self.hidden_dims = hidden_dim self.num_slots = num_slots self.identity_action_flag = identity_action self.canonical = canonical if encoder == 'cswm': self.encoder = modules.EncoderCSWM( input_dims=self.input_dims, embedding_dim=self.embedding_dim, num_objects=self.num_slots, cnn_size=cnn_size) if trans_model == 'gnn': self.transition_model = modules.TransitionGNN( input_dim=self.embedding_dim, hidden_dim=512, action_dim=12, num_objects=self.num_slots, residual=residual) elif trans_model == 'attention': self.transition_model = attention.MultiHeadCondAttention( n_head=5, input_feature_dim=self.embedding_dim + 12, out_dim=self.embedding_dim, dim_k=128, dim_v=128) if decoder == 'broadcast': self.decoder = spd.BroadcastDecoder( latent_dim=self.embedding_dim, output_dim=4, # 3 rgb channels and one mask hidden_channels=32, num_layers=4, img_dims=self. input_dims[1:], # width and height of square image act_fn='elu') elif decoder == 'cnn': out_shape = self.input_dims out_shape[0] += 1 self.decoder = modules.DecoderCNNMedium( input_dim=self.embedding_dim, hidden_dim=32, num_objects=self.num_slots, output_size=out_shape) print('Number of params in encoder ', util.count_params(self.encoder)) print(f'Number of params in transition model ', util.count_params(self.transition_model)) print('Number of params in decoder ', util.count_params(self.decoder)) self.l2_loss = nn.MSELoss(reduction="mean")
input_shape = obs['image1'].size()[1:] model = nod.NodModel( embedding_dim=args.embedding_dim, input_dims=input_shape, hidden_dim=args.hidden_dim, num_slots=args.num_slots, encoder=args.encoder, cnn_size=args.cnn_size, trans_model=args.trans_model, decoder=args.decoder, identity_action=args.identity_action, residual=args.residual, canonical=args.canonical_rep) model.to(device) print('Number of parameters in model', util.count_params(model)) if args.checkpoint_path is not None: print("Loading model from %s" % args.checkpoint_path) util.custom_load(model, path=args.checkpoint_path) else: print("Initialising random weights") model.apply(util.weights_init) optimizer = torch.optim.Adam( model.parameters(), lr=args.learning_rate) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.4) now = datetime.datetime.now()
def training(config, cla): g_global_step = tf.Variable(0, trainable=False, name=config['model']['type']+"_global_step") glr = config['optimizer']['lr'] sess = tf.Session() # build model G = hparams.get_model(config['model']['type'])(config, sess) ## update params G_vars = [var for var in tf.trainable_variables() if config['model']['type'] in var.name] util.count_params(G_vars, config['model']['type']) util.total_params() g_learning_rate = tf.placeholder(tf.float32, []) g_ozer = hparams.get_optimizer(config['optimizer']['type'])(learn_rate=g_learning_rate) g_grad = g_ozer.compute_gradients(G.loss, G_vars) g_update = g_ozer.apply_gradients(g_grad, global_step=g_global_step) g_grad_fix = g_ozer.compute_gradients(G.loss_fix, G_vars) g_update_fix = g_ozer.apply_gradients(g_grad_fix, global_step=g_global_step) ## restore from checkpoint G_save_path = os.path.join(config['training']['path'], 'generat.ckpt') sess.run(tf.global_variables_initializer()) G.load(G_save_path) history_file = os.path.join(config['training']['path'], 'history.txt') tr_dataset = get_dataset(config, 'tr') cv_dataset = get_dataset(config, 'cv') tr_next = tr_dataset.get_iterator() cv_next = cv_dataset.get_iterator() valid_best_sdr = float('-inf') valid_wait = 0 if config['training']['perm_path'] != None: fixed_perm_list = util.read_pretrained_perm(config['training']['perm_path'], tr_dataset.file_base) last_step = sess.run(g_global_step) tr_audio_perm = {i:[] for i in range(20000)} if last_step == 0 else util.load_perm(config, 'tr', last_step, tr_dataset, 20000) for epoch in range(last_step//(20000//config['training']['batch_size'])+1, config['training']['num_epochs'] + 1): tr_loss = tr_size = tr_sdr = 0.0 util.myprint(history_file, '-' * 20 + ' epoch {} '.format(epoch) + '-' * 20) ## training data initial if hasattr(tr_dataset, 'iterator'): sess.run(tr_dataset.iterator.initializer) else: tr_gen = tr_dataset.get_next() while True: try: feed_audio, audio_idx = sess.run(tr_next) if tr_next != None else next(tr_gen) if config['training']['pit'] == True: g_loss, g_sdr, g_curr_step, _, g_perm_idx = sess.run( fetches=[G.loss, G.sdr, g_global_step, g_update, G.perm_idxs], feed_dict={G.audios: feed_audio, g_learning_rate: glr}) elif config['training']['perm_path'] != None: fixed_perm = np.take(fixed_perm_list, audio_idx, axis=0) g_loss, g_sdr, g_curr_step, _, g_perm_idx = sess.run( fetches=[G.loss_fix, G.sdr_fix, g_global_step, g_update_fix, G.perm_idxs_fix], feed_dict={G.audios: feed_audio, g_learning_rate: glr, G.fixed_perm: fixed_perm}) tr_loss += g_loss tr_sdr += g_sdr tr_size += 1 print('Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'. format(g_curr_step, config['training']['loss'], g_loss, g_sdr, glr), end='\r') # record label assignment for _i, _id in enumerate(audio_idx): tr_audio_perm[_id].append(g_perm_idx[_i].tolist()) except (tf.errors.OutOfRangeError, StopIteration): util.myprint(history_file, 'Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'. format(g_curr_step, config['training']['loss'], g_loss, g_sdr, glr)) util.myprint(history_file, 'mean {} = {:5f} , mean sdr = {:5f}, lr = {}'. format(config['training']['loss'], tr_loss/tr_size, tr_sdr/tr_size, glr)) break ## valid iteration if hasattr(cv_dataset, 'iterator'): sess.run(cv_dataset.iterator.initializer) else: cv_gen = cv_dataset.get_next() cv_loss = cv_size = cv_sdr = 0.0 while True: try: feed_audio, audio_idx = sess.run(cv_next) if cv_next != None else next(cv_gen) g_loss, g_sdr = sess.run(fetches=[G.loss, G.sdr], feed_dict={G.audios: feed_audio}) cv_loss += g_loss cv_sdr += g_sdr cv_size += 1 except (tf.errors.OutOfRangeError, StopIteration): curr_loss = cv_loss/cv_size curr_sdr = cv_sdr/cv_size util.myprint(history_file, 'Valid '+ config['training']['loss'] +' = {:5f}, sdr = {}'.\ format(curr_loss, curr_sdr)) ## save model for every improve of the best valid score ## or last epoch if curr_sdr > valid_best_sdr or epoch == config['training']['num_epochs']: util.myprint(history_file, 'Save Model') valid_wait = 0 valid_best_sdr = curr_sdr G.save(G_save_path, g_curr_step) else: valid_wait += 1 if valid_wait == config['training']['half_lr_patience']: glr /= 2; valid_wait = 0 break util.write(os.path.join(config['training']['path'], 'tr_perm.csv'), tr_dataset.file_base, tr_audio_perm, epoch, config['training']['n_speaker'])
def training(config, cla): g_global_step = tf.Variable(0, trainable=False, name=config['model']['type'] + "_global_step") glr = config['optimizer']['lr'] sess = tf.Session() # build model G = hparams.get_model(config['model']['type'])(config, sess) ## update params G_vars = [ var for var in tf.trainable_variables() if config['model']['type'] in var.name ] util.count_params(G_vars, config['model']['type']) util.total_params() g_learning_rate = tf.placeholder(tf.float32, []) g_ozer = hparams.get_optimizer( config['optimizer']['type'])(learn_rate=g_learning_rate) g_grad = g_ozer.compute_gradients(G.loss, G_vars) g_update = g_ozer.apply_gradients(g_grad, global_step=g_global_step) ## restore from checkpoint G_save_path = os.path.join(config['training']['path'], 'generat.ckpt') sess.run(tf.global_variables_initializer()) G.load(G_save_path) history_file = os.path.join(config['training']['path'], 'history.txt') tr_dataset = get_dataset(config, 'tr') cv_dataset = get_dataset(config, 'cv') tr_next = tr_dataset.get_iterator() cv_next = cv_dataset.get_iterator() valid_best_sdr = float('-inf') valid_wait = 0 for epoch in range(1, config['training']['num_epochs'] + 1): tr_loss = tr_size = tr_sdr = 0.0 util.myprint(history_file, '-' * 20 + ' epoch {} '.format(epoch) + '-' * 20) ## training data initial if hasattr(tr_dataset, 'iterator'): sess.run(tr_dataset.iterator.initializer) else: tr_gen = tr_dataset.get_next() while True: try: feed_audio, audio_idx = sess.run( tr_next) if tr_next != None else next(tr_gen) g_loss, g_sdr, g_curr_step, _ = sess.run( fetches=[G.loss, G.sdr, g_global_step, g_update], feed_dict={ G.audios: feed_audio, g_learning_rate: glr }) tr_loss += g_loss tr_sdr += g_sdr tr_size += 1 print('Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.format( g_curr_step, config['training']['loss'], g_loss, g_sdr, glr), end='\r') except (tf.errors.OutOfRangeError, StopIteration): util.myprint( history_file, 'Train step {}: {} = {:5f}, sdr = {:5f}, lr = {}'.format( g_curr_step, config['training']['loss'], g_loss, g_sdr, glr)) util.myprint( history_file, 'mean {} = {:5f} , mean sdr = {:5f}, lr = {}'.format( config['training']['loss'], tr_loss / tr_size, tr_sdr / tr_size, glr)) break ## valid iteration if hasattr(cv_dataset, 'iterator'): sess.run(cv_dataset.iterator.initializer) else: cv_gen = cv_dataset.get_next() cv_loss = cv_size = cv_sdr = 0.0 while True: try: feed_audio, audio_idx = sess.run( cv_next) if cv_next != None else next(cv_gen) g_loss, g_sdr = sess.run(fetches=[G.loss, G.sdr], feed_dict={G.audios: feed_audio}) cv_loss += g_loss cv_sdr += g_sdr cv_size += 1 except (tf.errors.OutOfRangeError, StopIteration): curr_loss = cv_loss / cv_size curr_sdr = cv_sdr / cv_size util.myprint(history_file, 'Valid '+ config['training']['loss'] +' = {:5f}, sdr = {}'.\ format(curr_loss, curr_sdr)) ## save model for every improve of the best valid score ## or last epoch if curr_sdr > valid_best_sdr or epoch == config['training'][ 'num_epochs']: util.myprint(history_file, 'Save Model') valid_wait = 0 valid_best_sdr = curr_sdr G.save(G_save_path, g_curr_step) else: valid_wait += 1 if valid_wait == config['training']['half_lr_patience']: glr /= 2 valid_wait = 0 break
def main(): global args, best_er1 args = parser.parse_args() # Check if CUDA is enabled args.cuda = not args.no_cuda and torch.cuda.is_available() for tgt_idx, tgt in enumerate(dataset_targets[args.dataset]): print("Training a model for {}".format(tgt)) # Load data root = args.dataset_path if args.dataset_path else dataset_paths[ args.dataset] task_type = args.dataset_type if args.dataset_type else dataset_types[ args.dataset] if args.resume: resume_dir = args.resume.format(dataset=args.dataset, model=args.model, layers=args.layers, feature=tgt) #end if Model_Class = model_dict[args.model] print("Preparing dataset") node_features, edge_features, target_features, task_type, train_loader, valid_loader, test_loader = read_dataset( args.dataset, root, args.batch_size, args.prefetch) # Define model and optimizer print('\tCreate model') hidden_state_size = args.hidden model = Model_Class(node_features=node_features, edge_features=edge_features, target_features=1, hidden_features=hidden_state_size, num_layers=args.layers, dropout=0.5, type=task_type, s2s_processing_steps=args.s2s) print("#Parameters: {param_count}".format( param_count=count_params(model))) print('Optimizer') optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion, evaluation, metric_name, metric_compare, metric_best = get_metric_by_task_type( task_type, target_features) print('Logger') logger = Logger( args.log_path.format(dataset=args.dataset, model=args.model, layers=args.layers, feature=tgt)) lr_step = (args.lr - args.lr * args.lr_decay) / ( args.epochs * args.schedule[1] - args.epochs * args.schedule[0]) # get the best checkpoint if available without training if args.resume: checkpoint_dir = resume_dir best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') if not os.path.isdir(checkpoint_dir): os.makedirs(checkpoint_dir) if os.path.isfile(best_model_file): print("=> loading best model '{}'".format(best_model_file)) checkpoint = torch.load(best_model_file) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_er1'] model.load_state_dict(checkpoint['state_dict']) if args.cuda: model.cuda() optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded best model '{}' (epoch {})".format( best_model_file, checkpoint['epoch'])) else: print("=> no best model found at '{}'".format(best_model_file)) print('Check cuda') if args.cuda: print('\t* Cuda') model = model.cuda() criterion = criterion.cuda() # Epoch for loop for epoch in range(0, args.epochs): try: if epoch > args.epochs * args.schedule[ 0] and epoch < args.epochs * args.schedule[1]: args.lr -= lr_step for param_group in optimizer.param_groups: param_group['lr'] = args.lr #end if # train for one epoch train(train_loader, model, criterion, optimizer, epoch, evaluation, logger, target_range=(tgt_idx, ), tgt_name=tgt, metric_name=metric_name, cuda=args.cuda, log_interval=args.log_interval) # evaluate on test set er1 = validate(valid_loader, model, criterion, evaluation, logger, target_range=(tgt_idx, ), tgt_name=tgt, metric_name=metric_name, cuda=args.cuda, log_interval=args.log_interval) is_best = metric_compare(er1, best_er1) best_er1 = metric_best(er1, best_er1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_er1': best_er1, 'optimizer': optimizer.state_dict(), }, is_best=is_best, directory=resume_dir) # Logger step logger.log_value('learning_rate', args.lr).step() except KeyboardInterrupt: break #end try #end for # get the best checkpoint and test it with test set if args.resume: checkpoint_dir = resume_dir best_model_file = os.path.join(checkpoint_dir, 'model_best.pth') if not os.path.isdir(checkpoint_dir): os.makedirs(checkpoint_dir) if os.path.isfile(best_model_file): print("=> loading best model '{}'".format(best_model_file)) checkpoint = torch.load(best_model_file) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_er1'] model.load_state_dict(checkpoint['state_dict']) if args.cuda: model.cuda() optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded best model '{}' (epoch {})".format( best_model_file, checkpoint['epoch'])) else: print("=> no best model found at '{}'".format(best_model_file)) #end if #end if # (For testing) validate(test_loader, model, criterion, evaluation, target_range=(tgt_idx, ), tgt_name=tgt, metric_name=metric_name, cuda=args.cuda, log_interval=args.log_interval)
def go(options): marker = itertools.cycle((',', '+', '.', 'o', '*')) SIZE = options.size EPOCHS = options.epochs BATCH_SIZE = options.batch_size TRAIN_SIZE = 60000 // BATCH_SIZE TEST_SIZE = 10000 // BATCH_SIZE CUDA = options.cuda # for modelname in ['relu', 'sigmoid', 'relu-lambda', 'sigmoid-lambda', 'relu-sigloss', 'sigmoid-sigloss', 'bn-relu', 'relu-bn', 'sigmoid-bn']: for modelname in ['relu-lambda', 'relu', 'relu-bn']: #, 'linear-bn']: print('testing model ', modelname) model = load_model(modelname, size=SIZE, mult=options.mult) print(util.count_params(model), ' parameters') if CUDA: model.cuda() criterion = nn.MSELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=options.learning_rate) accuracies = [] results = {} for e in tqdm.trange(EPOCHS): for i in range(TRAIN_SIZE): x = torch.rand(BATCH_SIZE, SIZE) if CUDA: x = x.cuda() x = Variable(x) x.requires_grad = False optimizer.zero_grad() y = model(x) loss = criterion(y, x) if 'lambda' in modelname: lloss = sum(loss_terms(model, x)) # print(loss.data[0], lloss.data[0]) loss = loss + options.lambd * lloss if 'sigloss' in modelname: lterms = loss_terms(model, x) lloss = (1.0 / len(lterms)) * sum( [nn.functional.sigmoid(l) for l in lterms]) loss = loss + options.lambd * lloss loss.backward() optimizer.step() # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e) # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e) # # if i > 2: # break sm = 0 for i in range(TEST_SIZE): x = torch.rand(BATCH_SIZE, +SIZE) if CUDA: x = x.cuda() x = Variable(x) x.requires_grad = False y = model(x) loss = criterion(y, x) # print(loss.data[0] / BATCH_SIZE ) sm += float(loss.data[0]) # if i > 2: # break accuracies.append(sm / (TEST_SIZE * BATCH_SIZE)) # accuracies = np.asarray(accuracies) plt.plot(accuracies, label=modelname, marker=next(marker)) results[modelname] = list(accuracies) plt.title('lambda ' + str(options.lambd)) plt.legend() plt.savefig('loss-curves.pdf') pickle.dump(results, open('results.pkl', 'wb'))
def go_learnrate(options): marker = itertools.cycle((',', '+', '.', 'o', '*')) SHAPE = options.size EPOCHS = options.epochs BATCH_SIZE = options.batch_size CUDA = options.cuda w = SummaryWriter() for learnrate in [ 0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05 ]: print('testing learning rate ', learnrate) model = load_model('relu', False) print(util.count_params(model), ' parameters') if CUDA: model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learnrate) accuracies = [] results = {} for e in tqdm.trange(EPOCHS): for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data if CUDA: inputs, labels = inputs.cuda(), labels.cuda() # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e) # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e) # # if i > 2: # break correct = 0 total = 0 for i, data in enumerate(testloader): inputs, labels = data if CUDA: inputs, labels = inputs.cuda(), labels.cuda() # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() # if i > 2: # break accuracies.append(correct / total) # accuracies = np.asarray(accuracies) plt.plot(accuracies, label=str(learnrate), marker=next(marker)) results[str(learnrate)] = list(accuracies) plt.title('learning rates ') plt.legend() plt.savefig('loss-curves-lr.pdf') pickle.dump(results, open('results-lr.pkl', 'wb'))
def go_learnrate(options): marker = itertools.cycle((',', '+', '.', 'o', '*')) EPOCHS = options.epochs BATCH_SIZE = options.batch_size CUDA = options.cuda w = SummaryWriter() # Set up the dataset normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train = torchvision.datasets.CIFAR10(root=options.data, train=True, download=True, transform=normalize) trainloader = torch.utils.data.DataLoader(train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) test = torchvision.datasets.CIFAR10(root=options.data, train=False, download=True, transform=normalize) testloader = torch.utils.data.DataLoader(test, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') for learnrate in [ 0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05 ]: print('testing learning rate ', learnrate) model = load_model('relu', False) print(util.count_params(model), ' parameters') if CUDA: model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=learnrate) accuracies = [] results = {} for e in tqdm.trange(EPOCHS): for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data if CUDA: inputs, labels = inputs.cuda(), labels.cuda() # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # w.add_scalar('normalization/mloss', mloss.data[0], i * BATCH_SIZE + e) # w.add_scalar('normalization/lloss', lloss.data[0], i * BATCH_SIZE + e) # # if i > 2: # break correct = 0 total = 0 for i, data in enumerate(testloader): inputs, labels = data if CUDA: inputs, labels = inputs.cuda(), labels.cuda() # wrap them in Variable inputs, labels = Variable(inputs), Variable(labels) outputs = model(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels.data).sum() # if i > 2: # break accuracies.append(correct / total) # accuracies = np.asarray(accuracies) plt.plot(accuracies, label=str(learnrate), marker=next(marker)) results[str(learnrate)] = list(accuracies) plt.title('learning rates ') plt.legend() plt.savefig('loss-curves-lr.pdf') pickle.dump(results, open('results-lr.pkl', 'wb'))