class bgrl(Method): def __init__(self, input_shape, vol): super(bgrl, self).__init__(500) self.input_shape = input_shape self.vol = vol self.gamma = 1.0 # control the effect of softmax self.losses = np.zeros((self.max_iter, )) self.vals = np.zeros((self.max_iter, )) self.device = torch.device("cuda") self.mu = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.nu = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.tf_optim = Adam(list(self.mu.parameters()) + list(self.nu.parameters()), lr=0.002) def update_parameters(self, As, Bs, shuffle=True): if shuffle: np.random.shuffle(As) np.random.shuffle(Bs) As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.mu(As) VBs = self.nu(Bs) cost = torch.norm(As - Bs, p=2, dim=-1) damping = VAs.squeeze() - VBs.squeeze() - cost damping = self.gamma * torch.exp(damping / self.gamma) loss = -VAs.mean() + VBs.mean() + damping.mean() self.tf_optim.zero_grad() loss.backward() self.tf_optim.step() return loss.item() def estimate(self, As, Bs): As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.mu(As) VBs = self.nu(Bs) rv = torch.abs(VAs.mean() - VBs.mean()) return rv.squeeze().detach().cpu().numpy() def train(self, As, Bs): for i in range(self.max_iter): loss = self.update_parameters(As, Bs) self.losses[i] = loss self.vals[i] = self.estimate(As, Bs)
def mpl(root, path_train, path_test): data_set_train = dataset_MLP(root + path_train, train=True) data_set_test = dataset_MLP(root + path_test, train=False) trainloader = DataLoader(data_set_train, batch_size=1000, shuffle=True) testloader = DataLoader(data_set_test, batch_size=1000) model = MLP() criterion = t.nn.CrossEntropyLoss() lr = 0.01 optimizer = t.optim.SGD(model.parameters(), lr, momentum=0.4) for epoch in range(240): for _, (data, label) in enumerate(trainloader): model.train() optimizer.zero_grad() score = model(data) loss = criterion(score, label) loss.backward() optimizer.step() print("Epoch:%d loss:%f" % (epoch, loss.mean())) res = [] for _, (data) in enumerate(testloader): model.eval() predict = model(data) predict = predict.detach().numpy().tolist() res += predict res = np.array(res) ans = np.argmax(res, axis=1) data_set_test.save_res(ans, "./images/res_MLP.csv")
def main(dataset, dim, layers, lr, reg, epochs, batchsize): n_user = overlap_user(dataset) print(n_user) logging.info(str(n_user)) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') mf_s, mf_t = load_model(dataset, dim) mapping = MLP(dim, layers) mf_s = mf_s.to(device) mf_t = mf_t.to(device) mapping = mapping.to(device) opt = torch.optim.Adam(mapping.parameters(), lr=lr, weight_decay=reg) mse_loss = nn.MSELoss() start = time() for epoch in range(epochs): loss_sum = 0 for users in batch_user(n_user, batchsize): us = torch.tensor(users).long() us = us.to(device) u = mf_s.get_embed(us) y = mf_t.get_embed(us) loss = train(mapping, opt, mse_loss, u, y) loss_sum += loss print('Epoch %d [%.1f] loss = %f' % (epoch, time()-start, loss_sum)) logging.info('Epoch %d [%.1f] loss = %f' % (epoch, time()-start, loss_sum)) start = time() mfile = 'pretrain/%s/Mapping.pth.tar' % dataset torch.save(mapping.state_dict(), mfile) print('save [%.1f]' % (time()-start)) logging.info('save [%.1f]' % (time()-start))
def train_and_send(global_model_weights, current_epoch, IDS_df): device = 'cpu' if torch.cuda.is_available(): device = 'cuda' # Defining the DNN model input_size = model_input_size model = MLP(input_size) model.load_state_dict(torch.load(global_model_weights)) model.to(device) # Cross Entropy Loss error = nn.CrossEntropyLoss().to(device) # Adam Optimizer learning_rate = 0.001 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.01) model, loss = train_model_stratified(model, optimizer, error, device, current_epoch, IDS_df) # Encode model weights and send model.to('cpu') model_str = encode_weights(model) remote_mqttclient.publish(TRAINED_MODEL_TOPIC, payload=model_str, qos=2, retain=False) remote_mqttclient.publish(TRAINED_LOSS_TOPIC, payload=str(loss), qos=2, retain=False)
class wgan(Method): def __init__(self, input_shape, vol): super(wgan, self).__init__(2000) self.input_shape = input_shape self.vol = vol self.clamp_max = 0.01 self.losses = np.zeros((self.max_iter, )) self.vals = np.zeros((self.max_iter, )) self.device = torch.device("cuda") self.disc = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.disc_optim = Adam(self.disc.parameters(), lr=0.002) def update_parameters(self, As, Bs, shuffle=True): if shuffle: np.random.shuffle(As) np.random.shuffle(Bs) As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.disc(As) VBs = self.disc(Bs) loss1 = VAs.mean() loss2 = -VBs.mean() self.disc_optim.zero_grad() loss1.backward() loss2.backward() self.disc_optim.step() for p in self.disc.parameters(): p.data.clamp_(-self.clamp_max, self.clamp_max) return (loss1 + loss2).item() def estimate(self, As, Bs): As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.disc(As) VBs = self.disc(Bs) rv = torch.abs(VAs.mean() - VBs.mean()) return rv.squeeze().detach().cpu().numpy() def train(self, As, Bs): for i in range(self.max_iter): loss = self.update_parameters(As, Bs) self.losses[i] = loss self.vals[i] = self.estimate(As, Bs)
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # # Build data loader # dataset,targets= load_dataset() # np.save("__cache_dataset.npy", dataset) # np.save("__cache_targets.npy", targets) # return dataset = np.load("__cache_dataset.npy") targets = np.load("__cache_targets.npy") # Build the models mlp = MLP(args.input_size, args.output_size) mlp.load_state_dict( torch.load( '_backup_model_statedict/mlp_100_4000_PReLU_ae_dd_final.pkl')) if torch.cuda.is_available(): mlp.cuda() # Loss and Optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adagrad(mlp.parameters()) # Train the Models total_loss = [] print(len(dataset)) print(len(targets)) sm = 100 # start saving models after 100 epochs for epoch in range(args.num_epochs): print("epoch" + str(epoch)) avg_loss = 0 for i in range(0, len(dataset), args.batch_size): # Forward, Backward and Optimize mlp.zero_grad() bi, bt = get_input(i, dataset, targets, args.batch_size) bi = to_var(bi) bt = to_var(bt) bo = mlp(bi) loss = criterion(bo, bt) avg_loss = avg_loss + loss.item() loss.backward() optimizer.step() print("--average loss:") print(avg_loss / (len(dataset) / args.batch_size)) total_loss.append(avg_loss / (len(dataset) / args.batch_size)) # Save the models if epoch == sm: model_path = 'mlp_100_4000_PReLU_ae_dd' + str(sm) + '.pkl' torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path)) sm = sm + 50 # save model after every 50 epochs from 100 epoch ownwards torch.save(total_loss, 'total_loss.dat') model_path = 'mlp_100_4000_PReLU_ae_dd_final.pkl' torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path))
def gpu_thread(load, memory_queue, process_queue, common_dict, worker): # the only thread that has an access to the gpu, it will then perform all the NN computation import psutil p = psutil.Process() p.cpu_affinity([worker]) import signal signal.signal(signal.SIGINT, signal.SIG_IGN) try: print('process started with pid: {} on core {}'.format( os.getpid(), worker), flush=True) model = MLP(parameters.OBS_SPACE, parameters.ACTION_SPACE) model.to(parameters.DEVICE) # optimizer = optim.Adam(model.parameters(), lr=5e-5) # optimizer = optim.SGD(model.parameters(), lr=3e-2) optimizer = optim.RMSprop(model.parameters(), lr=1e-4) epochs = 0 if load: checkpoint = torch.load('./model/walker.pt') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epochs = checkpoint['epochs'] observations = torch.Tensor([]).to(parameters.DEVICE) rewards = torch.Tensor([]).to(parameters.DEVICE) actions = torch.Tensor([]).to(parameters.DEVICE) probs = torch.Tensor([]).to(parameters.DEVICE) common_dict['epoch'] = epochs while True: memory_full, observations, rewards, actions, probs = \ destack_memory(memory_queue, observations, rewards, actions, probs) destack_process(model, process_queue, common_dict) if len(observations) > parameters.MAXLEN or memory_full: epochs += 1 print('-' * 60 + '\n epoch ' + str(epochs) + '\n' + '-' * 60) run_epoch(epochs, model, optimizer, observations, rewards, actions, probs) observations = torch.Tensor([]).to(parameters.DEVICE) rewards = torch.Tensor([]).to(parameters.DEVICE) actions = torch.Tensor([]).to(parameters.DEVICE) probs = torch.Tensor([]).to(parameters.DEVICE) torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epochs': epochs }, './model/walker.pt') common_dict['epoch'] = epochs except Exception as e: print(e) print('saving before interruption', flush=True) torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epochs': epochs }, './model/walker.pt')
def main(opt): train_dataset = BADataset(opt.dataroot, opt.L, True, False, False) train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False) valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) test_dataset = BADataset(opt.dataroot, opt.L, False, False, True) test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) all_dataset = BADataset(opt.dataroot, opt.L, False, False, False) all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=opt.workers, drop_last=False) opt.n_edge_types = train_dataset.n_edge_types opt.n_node = train_dataset.n_node net = MLP(opt) net.double() print(net) criterion = nn.BCELoss() if opt.cuda: net.cuda() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) early_stopping = EarlyStopping(patience=opt.patience, verbose=True) os.makedirs(OutputDir, exist_ok=True) train_loss_ls = [] valid_loss_ls = [] test_loss_ls = [] for epoch in range(0, opt.niter): train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt) valid_loss = valid(valid_dataloader, net, criterion, opt) test_loss = test(test_dataloader, net, criterion, opt) train_loss_ls.append(train_loss) valid_loss_ls.append(valid_loss) test_loss_ls.append(test_loss) early_stopping(valid_loss, net, OutputDir) if early_stopping.early_stop: print("Early stopping") break df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls}) df.to_csv(OutputDir + '/loss.csv', index=False) net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt')) inference(all_dataloader, net, criterion, opt, OutputDir)
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words = process_data( data_dir=FLAGS.data_dir, data_file=FLAGS.data_file, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print("[COMPLETE]") # Initialize model, criterion, loss print("==> Initializing model components ... ", end="") model = MLP( D_in=num_unique_words, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr) print("[COMPLETE]") # Train the model print("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, ) print("\n[COMPLETE]") # Save the model print("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print("\n[COMPLETE]")
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print ("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words = process_data( data_dir=FLAGS.data_dir, data_file=FLAGS.data_file, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print ("[COMPLETE]") # Initialize model, criterion, loss print ("==> Initializing model components ... ", end="") model = MLP( D_in=num_unique_words, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr) print ("[COMPLETE]") # Train the model print ("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, ) print ("\n[COMPLETE]") # Save the model print ("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print ("\n[COMPLETE]")
def main(): np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) data = locate('get_{}'.format(args.dataset))(args) train_data, val_data, test_data = data if args.dataset in 'mnist': model = MLP(args) elif args.dataset in 'cifar10' or args.dataset in 'cifar100': model = Resnet18(args) else: raise Exception('error') weight_arch = data_selection(data[0]) architect = Architect(model, weight_arch, args) train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, drop_last = True) val_loader = DataLoader(val_data, batch_size = 64, shuffle = True, drop_last = False) test_loader = DataLoader(test_data, batch_size = 64, shuffle = True, drop_last = False) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) print(optimizer.state) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.n_epochs), eta_min=args.learning_rate_min) for epoch in range(args.n_epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) train_acc, train_obj = Train(train_loader, val_data, model, args, architect, weight_arch, optimizer) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(val_loader, model) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): parser = ArgumentParser(description='train a MLP model') parser.add_argument('INPUT', type=str, help='path to input') parser.add_argument('EMBED', type=str, help='path to embedding') parser.add_argument('--gpu', '-g', default=-1, type=int, help='gpu number') args = parser.parse_args() word_to_id = word2id(args.INPUT) embedding = id2embedding(args.EMBED, word_to_id) train_loader = MyDataLoader(args.INPUT, word_to_id, batch_size=5000, shuffle=True, num_workers=1) # インスタンスを作成 net = MLP(word_to_id, embedding) optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) gpu_id = args.gpu device = torch.device("cuda:{}".format(gpu_id) if gpu_id >= 0 else "cpu") net = net.to(device) epochs = 5 log_interval = 10 for epoch in range(1, epochs + 1): net.train() # おまじない (Dropout などを使う場合に効く) for batch_idx, (ids, mask, labels) in enumerate(train_loader): # data shape: (batchsize, 1, 28, 28) ids, mask, labels = ids.to(device), mask.to(device), labels.to( device) optimizer.zero_grad( ) # 最初に gradient をゼロで初期化; これを呼び出さないと過去の gradient が蓄積されていく output = net(ids, mask) output2 = F.softmax(output, dim=1) loss = F.binary_cross_entropy(output2[:, 1], labels.float()) # 損失を計算 loss.backward() optimizer.step() # パラメータを更新 # 途中経過の表示 if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(ids), len(train_loader.dataset), 10 * batch_idx / len(train_loader), loss.item()))
def train_model(config, gpu_id, save_dir, exp_name): # Instantiating the model model_type = config.get('model_type', 'MLP') if model_type == "MLP": model = MLP(784, config["hidden_layers"], 10, config["nonlinearity"], config["initialization"], config["dropout"], verbose=True) elif model_type == "CNN": model = CNN(config["initialization"], config["is_batch_norm"], verbose=True) else: raise ValueError('config["model_type"] not supported : {}'.format(model_type)) # Loading the MNIST dataset x_train, y_train, x_valid, y_valid, x_test, y_test = utils.load_mnist(config["data_file"], data_format=config["data_format"]) if config['data_reduction'] != 1.: x_train, y_train = utils.reduce_trainset_size(x_train, y_train, config['data_reduction']) # If GPU is available, sends model and dataset on the GPU if torch.cuda.is_available(): model.cuda(gpu_id) x_train = torch.from_numpy(x_train).cuda(gpu_id) y_train = torch.from_numpy(y_train).cuda(gpu_id) x_valid = Variable(torch.from_numpy(x_valid), volatile=True).cuda(gpu_id) y_valid = Variable(torch.from_numpy(y_valid), volatile=True).cuda(gpu_id) x_test = Variable(torch.from_numpy(x_test), volatile=True).cuda(gpu_id) y_test = Variable(torch.from_numpy(y_test), volatile=True).cuda(gpu_id) print("Running on GPU") else: x_train = torch.from_numpy(x_train) y_train = torch.from_numpy(y_train) x_valid = Variable(torch.from_numpy(x_valid)) y_valid = Variable(torch.from_numpy(y_valid)) x_test = Variable(torch.from_numpy(x_test)) y_test = Variable(torch.from_numpy(y_test)) print("WATCH-OUT : torch.cuda.is_available() returned False. Running on CPU.") # Instantiate TensorDataset and DataLoader objects train_set = torch.utils.data.TensorDataset(x_train, y_train) loader = torch.utils.data.DataLoader(train_set, batch_size=config["mb_size"], shuffle=True) # Optimizer and Loss Function optimizer = optim.SGD(model.parameters(), lr=config['lr'], momentum=config['momentum'], weight_decay=config['L2_hyperparam'] * (config['mb_size'] / x_train.size()[0])) loss_fn = nn.NLLLoss() # Records the model's performance train_tape = [[],[]] valid_tape = [[],[]] test_tape = [[],[]] weights_tape = [] def evaluate(data, labels): model.eval() if not isinstance(data, Variable): if torch.cuda.is_available(): data = Variable(data, volatile=True).cuda(gpu_id) labels = Variable(labels, volatile=True).cuda(gpu_id) else: data = Variable(data) labels = Variable(labels) output = model(data) loss = loss_fn(output, labels) prediction = torch.max(output.data, 1)[1] accuracy = (prediction.eq(labels.data).sum() / labels.size(0)) * 100 return loss.data[0], accuracy if not os.path.exists(os.path.join(save_dir, exp_name)): os.makedirs(os.path.join(save_dir, exp_name)) # Record train accuracy train_loss, train_acc = evaluate(x_train, y_train) train_tape[0].append(train_loss) train_tape[1].append(train_acc) # Record valid accuracy valid_loss, valid_acc = evaluate(x_valid, y_valid) valid_tape[0].append(valid_loss) valid_tape[1].append(valid_acc) # Record test accuracy test_loss, test_acc = evaluate(x_test, y_test) test_tape[0].append(test_loss) test_tape[1].append(test_acc) # Record weights L2 norm weights_L2_norm = model.get_weights_L2_norm() weights_tape.append(float(weights_L2_norm.data.cpu().numpy())) print("BEFORE TRAINING \nLoss : {0:.3f} \nAcc : {1:.3f}".format(valid_loss, valid_acc)) # TRAINING LOOP best_valid_acc = 0 for epoch in range(1, config["max_epochs"]): start = time.time() model.train() for i,(x_batch, y_batch) in enumerate(loader): #pdb.set_trace() if torch.cuda.is_available(): x_batch = Variable(x_batch).cuda(gpu_id) y_batch = Variable(y_batch).cuda(gpu_id) else: x_batch = Variable(x_batch) y_batch = Variable(y_batch) # Empties the gradients optimizer.zero_grad() # Feedforward through the model output = model(x_batch) # Computes the loss loss = loss_fn(output, y_batch) # Backpropagates to compute the gradients loss.backward() # Takes one training step optimizer.step() # Record weights L2 norm weights_L2_norm = model.get_weights_L2_norm() weights_tape.append(float(weights_L2_norm.data.cpu().numpy())) # Record train accuracy train_loss, train_acc = evaluate(x_train, y_train) train_tape[0].append(train_loss) train_tape[1].append(train_acc) # Record valid accuracy valid_loss, valid_acc = evaluate(x_valid, y_valid) valid_tape[0].append(valid_loss) valid_tape[1].append(valid_acc) # Record test accuracy test_loss, test_acc = evaluate(x_test, y_test) test_tape[0].append(test_loss) test_tape[1].append(test_acc) print("Epoch {0} \nLoss : {1:.3f} \nAcc : {2:.3f}".format(epoch, valid_loss, valid_acc)) print("Time : {0:.2f}".format(time.time() - start)) # Saves the model if valid_acc > best_valid_acc: print("NEW BEST MODEL") torch.save(model.state_dict(), os.path.join(save_dir, exp_name, "model")) best_valid_acc = valid_acc # Saves the graphs utils.save_results(train_tape, valid_tape, test_tape, weights_tape, save_dir, exp_name, config) utils.update_comparative_chart(save_dir, config['show_test']) return
def main(): # check cuda device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu' # load data dataset = DglNodePropPredDataset(name=args.dataset) evaluator = Evaluator(name=args.dataset) split_idx = dataset.get_idx_split() g, labels = dataset[0] # graph: DGLGraph object, label: torch tensor of shape (num_nodes, num_tasks) if args.dataset == 'ogbn-arxiv': g = dgl.to_bidirected(g, copy_ndata=True) feat = g.ndata['feat'] feat = (feat - feat.mean(0)) / feat.std(0) g.ndata['feat'] = feat g = g.to(device) feats = g.ndata['feat'] labels = labels.to(device) # load masks for train / validation / test train_idx = split_idx["train"].to(device) valid_idx = split_idx["valid"].to(device) test_idx = split_idx["test"].to(device) n_features = feats.size()[-1] n_classes = dataset.num_classes # load model if args.model == 'mlp': model = MLP(n_features, args.hid_dim, n_classes, args.num_layers, args.dropout) elif args.model == 'linear': model = MLPLinear(n_features, n_classes) else: raise NotImplementedError(f'Model {args.model} is not supported.') model = model.to(device) print(f'Model parameters: {sum(p.numel() for p in model.parameters())}') if args.pretrain: print('---------- Before ----------') model.load_state_dict(torch.load(f'base/{args.dataset}-{args.model}.pt')) model.eval() y_soft = model(feats).exp() y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}') print('---------- Correct & Smoothing ----------') cs = CorrectAndSmooth(num_correction_layers=args.num_correction_layers, correction_alpha=args.correction_alpha, correction_adj=args.correction_adj, num_smoothing_layers=args.num_smoothing_layers, smoothing_alpha=args.smoothing_alpha, smoothing_adj=args.smoothing_adj, autoscale=args.autoscale, scale=args.scale) mask_idx = torch.cat([train_idx, valid_idx]) y_soft = cs.correct(g, y_soft, labels[mask_idx], mask_idx) y_soft = cs.smooth(g, y_soft, labels[mask_idx], mask_idx) y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}') else: opt = optim.Adam(model.parameters(), lr=args.lr) best_acc = 0 best_model = copy.deepcopy(model) # training print('---------- Training ----------') for i in range(args.epochs): model.train() opt.zero_grad() logits = model(feats) train_loss = F.nll_loss(logits[train_idx], labels.squeeze(1)[train_idx]) train_loss.backward() opt.step() model.eval() with torch.no_grad(): logits = model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) train_acc = evaluate(y_pred, labels, train_idx, evaluator) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) print(f'Epoch {i} | Train loss: {train_loss.item():.4f} | Train acc: {train_acc:.4f} | Valid acc {valid_acc:.4f}') if valid_acc > best_acc: best_acc = valid_acc best_model = copy.deepcopy(model) # testing & saving model print('---------- Testing ----------') best_model.eval() logits = best_model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f'Test acc: {test_acc:.4f}') if not os.path.exists('base'): os.makedirs('base') torch.save(best_model.state_dict(), f'base/{args.dataset}-{args.model}.pt')
class PolicyGradient: def __init__(self, state_dim, device='cpu', gamma=0.99, lr=0.01, batch_size=5): self.gamma = gamma self.policy_net = MLP(state_dim) self.optimizer = torch.optim.RMSprop(self.policy_net.parameters(), lr=lr) self.batch_size = batch_size def choose_action(self, state): state = torch.from_numpy(state).float() state = Variable(state) probs = self.policy_net(state) m = Bernoulli(probs) action = m.sample() action = action.data.numpy().astype(int)[0] # 转为标量 return action def update(self, reward_pool, state_pool, action_pool): # Discount reward running_add = 0 for i in reversed(range(len(reward_pool))): if reward_pool[i] == 0: running_add = 0 else: running_add = running_add * self.gamma + reward_pool[i] reward_pool[i] = running_add # Normalize reward reward_mean = np.mean(reward_pool) reward_std = np.std(reward_pool) for i in range(len(reward_pool)): reward_pool[i] = (reward_pool[i] - reward_mean) / reward_std # Gradient Desent self.optimizer.zero_grad() for i in range(len(reward_pool)): state = state_pool[i] action = Variable(torch.FloatTensor([action_pool[i]])) reward = reward_pool[i] state = Variable(torch.from_numpy(state).float()) probs = self.policy_net(state) m = Bernoulli(probs) loss = -m.log_prob( action) * reward # Negtive score function x reward # print(loss) loss.backward() self.optimizer.step() def save_model(self, path): torch.save(self.policy_net.state_dict(), path) def load_model(self, path): self.policy_net.load_state_dict(torch.load(path))
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print ("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words, \ num_unique_documents, word_to_idx = process_data( data_dir=FLAGS.data_dir, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print ("[COMPLETE]") # Load pretrained GloVe embeddings for our vocab embedding_dir = os.path.join(basedir, "../../../../embeddings/glove") embedding_dim = 100 embeddings = get_embeddings( embedding_dir=embedding_dir, embedding_dim=embedding_dim, words=word_to_idx.keys(), ) # Initialize model, criterion, loss print ("==> Initializing model components ... ", end="") model = MLP( D_in_words=num_unique_words, D_in_documents=num_unique_documents, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, embeddings=embeddings, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer # Only get the parameters with gradients (we freeze our GloVe embeddings) parameters = filter(lambda param: param.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=FLAGS.lr) print ("[COMPLETE]") # Train the model print ("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, log_every=FLAGS.log_every, ) print ("\n[COMPLETE]") # Save the model print ("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print ("\n[COMPLETE]")
class NonLocalTrainer(object): def __init__(self, args, trainLoader, testLoader): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.out_path = args.out self.sigma = args.sigma self.beta = args.beta self.nClass = args.nClass self.model = MLP().to(self.device) self.optim = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay) self.criterion = nn.MSELoss() self.trainLoader = trainLoader self.testLoader = testLoader self.run_datetime = datetime.datetime.now() if not os.path.exists(self.out_path): os.makedirs(self.out_path) self.logger = Logger(self.out_path) with open(os.path.join(self.out_path, "para.json"), "w") as f: json.dump(args.__dict__, f) self.epoch = 0 self.iteration = 0 self.test_step = 0 self.max_epoch = args.epochs self.val_interval = args.interval self.res = 0 self.best_error = 1e7; self.best_res_epoch = 0 self.noiseMean = torch.zeros(args.batch_size, args.featureNums, 17, 17) self.noiseStd = torch.div(torch.ones(args.batch_size, args.featureNums, 17, 17), 1e3) def validate_one_epoch(self): self.model.eval() self.test_step += 1 tsthreas = [0.1, 1, 10] tp = [0] * len(tsthreas) # true positive tn = [0] * len(tsthreas) # true negetive fp = [0] * len(tsthreas) # false positve fn = [0] * len(tsthreas) # false negetive ts = [0] * len(tsthreas) totalRegressionLoss = [] total_error = 0 total_count = 0 p_error = 0 p_count = 0 largeGapCount = 0 largeGap = 0 for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.testLoader), total=len(self.testLoader), desc='Valid :', ncols=80, leave=False): gt_micaps = target.numpy() data, target = data.to(device=self.device), target.to(device=self.device) with torch.no_grad(): predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) predictNumpy = predictValues.cpu().numpy() totalRegressionLoss.append(regressionLoss.item()) # totalClassificationLoss.append(classificationLoss.item()) # predicted = torch.argmax(preds, dim=1) # correct += (predicted == logits).sum().item() gapValues = np.abs(predictNumpy - gt_micaps) total_error += np.sum(gapValues) total_count += gt_micaps.shape[0] p_error += np.sum((gt_micaps > 0.01) * gapValues) p_count += np.sum(gt_micaps > 0.01) largeGap += np.sum((gapValues > 5) * gapValues) largeGapCount += np.sum(gapValues > 5) for i, threas in enumerate(tsthreas): tp[i] += np.sum((gt_micaps >= threas) * (predictNumpy >= threas)) tn[i] += np.sum((gt_micaps < threas) * (predictNumpy < threas)) fp[i] += np.sum((gt_micaps < threas) * (predictNumpy >= threas)) fn[i] += np.sum((gt_micaps >= threas) * (predictNumpy < threas)) for i, _ in enumerate(tsthreas): ts[i] += round(tp[i] / (tp[i] + fp[i] + fn[i]), 5) totalAverageError = round(total_error / total_count, 5) pAverageError = round(p_error / p_count, 5) totalLoss = np.sum(totalRegressionLoss) largeGapRatio = round(largeGapCount / total_count, 5) largeGapMae = round(largeGap / largeGapCount, 5) info = {"test_regression_loss": totalLoss, "ts_score": ts, "aver_gap": totalAverageError, "aver_p_gap": pAverageError, "large_gap_ratio": largeGapRatio, "large_gap_mae": largeGapMae } print("========================== Epoch {} Test Result Show ==========================".format(self.epoch + 1)) print(info) # for tag, value in info.items(): # self.logger.scalar_summary(tag, value, self.test_step) # if totalAverageError < self.best_error: # self.best_error = totalAverageError # self.best_res_epoch = self.epoch # info["epoch"] = self.epoch # info["modelParam"] = self.model.state_dict() # info["optimParam"] = self.optim.state_dict() # torch.save(info, os.path.join(self.out_path, str(self.epoch) + "_checkpoints.pth")) def train_one_epoch(self): self.model.train() for batch_idx, (data, target, _, _, _, _) in tqdm.tqdm( enumerate(self.trainLoader), total=len(self.trainLoader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iter_idx = batch_idx + self.epoch * len(self.trainLoader) # if (self.iteration != 0) and (iter_idx - 1) != self.iteration: # continue self.iteration = iter_idx assert self.model.training self.optim.zero_grad() data = data.to(device=self.device) target = target.to(device=self.device) predictValues = self.model(data) regressionLoss = self.criterion(predictValues, target) regressionLoss.backward() # for named,param in self.model.named_parameters(): # print("Name : " ,named) # print(param.grad.data.sum()) self.optim.step() regressionLossCpu = regressionLoss.item() self.logger.scalar_summary("train_regression_loss", regressionLossCpu, self.iteration + 1) for tag, value in self.model.named_parameters(): self.logger.histo_summary(tag, value.data.cpu().numpy(), self.epoch + 1) self.logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), self.epoch + 1) def run(self): for epoch in range(self.max_epoch): self.epoch = epoch self.train_one_epoch() if (self.epoch + 1) % self.val_interval == 0: self.validate_one_epoch()
X = (np.random.rand(10).reshape(-1, 1) - 1) / 2 # x between -0.5 and 0. Y = f(X) X = torch.from_numpy(X).type(torch.FloatTensor) Y = torch.from_numpy(Y).type(torch.FloatTensor) dataset = RegressionDataset(X, Y) # Reproducibility if args.seed is not None: torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) net = MLP() criterion = nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd) # Load reference net if defined if args.repulsive is not None: reference_net = model.MLP(dropout_rate=args.dropout_rate) reference_net.load_state_dict(torch.load(Path(args.repulsive))) # Update of the network parameters train_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False) # Sampling a repulsive bandwidth parameter alpha = -3 beta = -0.5 bandwidth_repulsive = float(10**(alpha + (beta - alpha) * np.random.rand())) # Preparation of the optimization
data_set_sizes.append(args.subset_fraction) cloned_outputs.append(None) # create training and validation loaders session_train_loader = torch.utils.data.DataLoader( data, batch_size=args.batch_size, sampler=torch.utils.data.SubsetRandomSampler( sum(session_train_ids, []))) session_val_loader = torch.utils.data.DataLoader( data, batch_size=args.batch_size, sampler=torch.utils.data.SubsetRandomSampler( sum(session_val_ids, []))) optimizer = torch.optim.Adam(params=model.parameters(), lr=args.lr) model_path_base = "base_{}".format(model_path) # joint training on all base tasks train_loss, val_loss = train_model(model, criterion, optimizer, session_train_loader, session_val_loader, task_id_dict=task_id_dict, outpath=model_path_base, device=device, store_model_internally=True) # update omega values
plt.show() # -------------- # --------------------- building model # -------------- # print(f"\t✅ building {args.network} model\n") # --------------------------------------------- if args.network == "mlp": net = MLP(input_neurons=mini_batch_inputs.shape[2]**2, output_neurons=mini_batch_labels.shape[1], learning_rate=args.learning_rate) elif args.network == "cnn": net = CNN(input_channels=mini_batch_inputs.shape[1], output_neurons=mini_batch_labels.shape[1]) optimizer = optim.Adam(net.parameters(), args.learning_rate) else: print("[❌] Network Not Supported!") sys.exit(1) # ------------------------------- # --------------------- training and evaluating process # ------------------------------- # print(f"\t✅ start training and evaluating process\n") # ----------------------------------------------------------- valid_loss_min = np.Inf criterion = torch.nn.CrossEntropyLoss() start_time = time.time() history = { "train_loss": [],
best_model = None _dev_data_loader = DataLoader(dev_data, batch_size=32, shuffle=False) for idx, params in enumerate(HYPER_PARA): BATCH_SIZE, DROP_RT, LR, EPOCHS = params best_roc = [] best_prc = [] data_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True) for run in range(1): model = MLP(NUM_ELEM, EMBEDDING_DIM, HIDDEN_DIM_ADD_ON, HIDDEN_DIM, NUM_CLS, NUM_LYS, ADD_ON_FEATS, 100, DROP_RT) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LR) device = torch.device('cuda:0') model.to(device) last_roc = -1 last_prc = -1 epochs_no_imprv = 0 for epoch in range(EPOCHS): model.train() epoch_loss = 0 batch = tqdm(data_loader) for elem, label, lengths, feats in batch: optimizer.zero_grad() prediction = model(elem, lengths, feats) # loss = torch.mean(F.cross_entropy(prediction, label, reduction='none')
def black_box_function(opt_param): mean_pure_ratio1 = 0 mean_pure_ratio2 = 0 print('building model...') cnn1 = MLP(n_outputs=num_classes) cnn1.cuda() print(cnn1.parameters) optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate) cnn2 = MLP(n_outputs=num_classes) cnn2.cuda() print(cnn2.parameters) optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate) rate_schedule = opt_param.copy() print('Schedule:', rate_schedule) epoch = 0 train_acc1 = 0 train_acc2 = 0 # evaluate models with random weights test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2) print( 'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%' % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2, mean_pure_ratio1, mean_pure_ratio2)) # save results with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' + str(rate_schedule[epoch]) + "\n") # training for epoch in range(1, args.n_epoch): # train models cnn1.train() adjust_learning_rate(optimizer1, epoch) cnn2.train() adjust_learning_rate(optimizer2, epoch) train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train( train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2, rate_schedule) # evaluate models test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2) # save results mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list) mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list) print( 'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%' % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2, mean_pure_ratio1, mean_pure_ratio2)) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' + str(rate_schedule[epoch]) + "\n") return (test_acc1 + test_acc2) / 200
class DQN: def __init__(self, n_states, n_actions, gamma=0.99, epsilon_start=0.9, epsilon_end=0.05, epsilon_decay=200, memory_capacity=10000, policy_lr=0.01, batch_size=128, device="cpu"): self.n_actions = n_actions # 总的动作个数 self.device = device # 设备,cpu或gpu等 self.gamma = gamma # 奖励的折扣因子 # e-greedy策略相关参数 self.actions_count = 0 # 用于epsilon的衰减计数 self.epsilon = 0 self.epsilon_start = epsilon_start self.epsilon_end = epsilon_end self.epsilon_decay = epsilon_decay self.batch_size = batch_size self.policy_net = MLP(n_states, n_actions).to(self.device) self.target_net = MLP(n_states, n_actions).to(self.device) # target_net的初始模型参数完全复制policy_net self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval() # 不启用 BatchNormalization 和 Dropout # 可查parameters()与state_dict()的区别,前者require_grad=True self.optimizer = optim.Adam(self.policy_net.parameters(), lr=policy_lr) self.loss = 0 self.memory = ReplayBuffer(memory_capacity) def choose_action(self, state, train=True): '''选择动作 ''' if train: self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ math.exp(-1. * self.actions_count / self.epsilon_decay) self.actions_count += 1 if random.random() > self.epsilon: with torch.no_grad(): # 先转为张量便于丢给神经网络,state元素数据原本为float64 # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价 state = torch.tensor([state], device=self.device, dtype=torch.float32) # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>) q_value = self.policy_net(state) # tensor.max(1)返回每行的最大值以及对应的下标, # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0])) # 所以tensor.max(1)[1]返回最大值对应的下标,即action action = q_value.max(1)[1].item() else: action = random.randrange(self.n_actions) return action else: with torch.no_grad(): # 取消保存梯度 # 先转为张量便于丢给神经网络,state元素数据原本为float64 # 注意state=torch.tensor(state).unsqueeze(0)跟state=torch.tensor([state])等价 state = torch.tensor( [state], device='cpu', dtype=torch.float32 ) # 如tensor([[-0.0798, -0.0079]], grad_fn=<AddmmBackward>) q_value = self.target_net(state) # tensor.max(1)返回每行的最大值以及对应的下标, # 如torch.return_types.max(values=tensor([10.3587]),indices=tensor([0])) # 所以tensor.max(1)[1]返回最大值对应的下标,即action action = q_value.max(1)[1].item() return action def update(self): if len(self.memory) < self.batch_size: return # 从memory中随机采样transition state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample( self.batch_size) '''转为张量 例如tensor([[-4.5543e-02, -2.3910e-01, 1.8344e-02, 2.3158e-01],...,[-1.8615e-02, -2.3921e-01, -1.1791e-02, 2.3400e-01]])''' state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) action_batch = torch.tensor(action_batch, device=self.device).unsqueeze( 1) # 例如tensor([[1],...,[0]]) reward_batch = torch.tensor( reward_batch, device=self.device, dtype=torch.float) # tensor([1., 1.,...,1]) next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) done_batch = torch.tensor(np.float32(done_batch), device=self.device).unsqueeze( 1) # 将bool转为float然后转为张量 '''计算当前(s_t,a)对应的Q(s_t, a)''' '''torch.gather:对于a=torch.Tensor([[1,2],[3,4]]),那么a.gather(1,torch.Tensor([[0],[1]]))=torch.Tensor([[1],[3]])''' q_values = self.policy_net(state_batch).gather( dim=1, index=action_batch) # 等价于self.forward # 计算所有next states的V(s_{t+1}),即通过target_net中选取reward最大的对应states next_state_values = self.target_net(next_state_batch).max( 1)[0].detach() # 比如tensor([ 0.0060, -0.0171,...,]) # 计算 expected_q_value # 对于终止状态,此时done_batch[0]=1, 对应的expected_q_value等于reward expected_q_values = reward_batch + self.gamma * \ next_state_values * (1-done_batch[0]) # self.loss = F.smooth_l1_loss(q_values,expected_q_values.unsqueeze(1)) # 计算 Huber loss self.loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1)) # 计算 均方误差loss # 优化模型 self.optimizer.zero_grad( ) # zero_grad清除上一步所有旧的gradients from the last step # loss.backward()使用backpropagation计算loss相对于所有parameters(需要gradients)的微分 self.loss.backward() for param in self.policy_net.parameters(): # clip防止梯度爆炸 param.grad.data.clamp_(-1, 1) self.optimizer.step() # 更新模型 def save_model(self, path): torch.save(self.target_net.state_dict(), path) def load_model(self, path): self.target_net.load_state_dict(torch.load(path))
config.batch_size, shuffle=False, num_workers=2) print(f"{datetime.now().ctime()} - Finish Loading Dataset") print( f"{datetime.now().ctime()} - Start Creating Net, Criterion, Optimizer and Scheduler..." ) if config.model == "mlp": net = MLP(config.cifar10_input_size, config.num_classes) elif config.model == "convnet": net = ConvNet(config.input_channel, config.num_classes) elif config.model == "onelayer": net = OneLayer(config.fashionmnist_input_size, config.num_classes) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), config.lr, momentum=config.momentum, weight_decay=config.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, len(train_dataloader) * config.epochs, eta_min=config.eta_min) print( f"{datetime.now().ctime()} - Finish Creating Net, Criterion, Optimizer and Scheduler" ) print(f"{datetime.now().ctime()} - Start Training...") print( f"Traing dataset: {len(train_dataset)}, iteration: {len(train_dataloader)}" )
torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # CLASSIFIER if args.use_conv: if args.imprint: model = ResNet18_imprint(num_classes=args.n_tasks*5) model.seen_classes = [] else: model = ResNet18(args.n_classes, nf=20, input_size=args.input_size) else: model = MLP(args) if args.cuda: model = model.to(args.device) opt = torch.optim.SGD(model.parameters(), lr=args.lr) buffer = Buffer(args) if run == 0: print("number of classifier parameters:", sum([np.prod(p.size()) for p in model.parameters()])) print("buffer parameters: ", np.prod(buffer.bx.size())) #---------- # Task Loop for task, tr_loader in enumerate(train_loader): sample_amt = 0 model = model.train()
def run(): print(f'Running from {os.getcwd()}') train_config, val_config = get_split_configs() print(f'Running with\n\ttrain_config: {train_config}\n\tval_config: {val_config}') train = AugMNISTDataset(transforms=['color'], config=train_config) val = AugMNISTDataset(transforms=['color'], config=val_config) train_dataloader = torch.utils.data.DataLoader(train, shuffle=True, batch_size=args.batch_size, num_workers=8) val_dataloader = torch.utils.data.DataLoader(val, shuffle=True, batch_size=1000, num_workers=0) mlp_width = 512 if args.use_l0: e_model = L0MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device) d_model = L0MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device) else: e_model = MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device) d_model = MLP(args.n_hidden, args.input_dim, mlp_width, 1).to(args.device) #summary(e_model, (13,)) #summary(d_model, (13,)) if args.optimizer == 'sgd': e_opt = torch.optim.SGD(e_model.parameters(), momentum=0.9, lr=args.lr) d_opt = torch.optim.SGD(d_model.parameters(), momentum=0.9, lr=args.lr) elif args.optimizer == 'adam': e_opt = torch.optim.Adam(e_model.parameters(), lr=args.lr) d_opt = torch.optim.Adam(d_model.parameters(), lr=args.lr) step = 0 task = generate_task() decay_epochs = [60,90,120,150] e_sched = torch.optim.lr_scheduler.MultiStepLR(e_opt, milestones=decay_epochs, gamma=0.1) d_sched = torch.optim.lr_scheduler.MultiStepLR(d_opt, milestones=decay_epochs, gamma=0.1) for epoch in range(args.epochs): for idx, samples in enumerate(train_dataloader): features = get_features(samples).to(args.device) entangled_features = get_features(samples, entangle=True).to(args.device) labels = get_labels(samples, task).to(args.device) if args.use_l0: e_out, l0_e = e_model(entangled_features) d_out, l0_d = d_model(features) else: e_out = e_model(entangled_features) d_out = d_model(features) e_pred = e_out > 0 e_acc = (e_pred == labels).float().mean() d_pred = d_out > 0 d_acc = (d_pred == labels).float().mean() e_bce = F.binary_cross_entropy_with_logits(e_out, labels) e_loss = e_bce d_bce = F.binary_cross_entropy_with_logits(d_out, labels) d_loss = d_bce # L0 if args.use_l0: l0_coef = 1e-1 d_loss += l0_coef * l0_d / len(samples) e_loss += l0_coef * l0_e / len(samples) # L1 if epoch <= args.rampup_begin: l1_coef = args.warmup_l1 else: l1_coef = args.warmup_l1 + args.l1 / (args.warmup_l1 + args.l1) * min(args.l1, args.l1 * (float(epoch) - args.rampup_begin) / (args.rampup_end-args.rampup_begin)) d_loss += l1_coef * l1(d_model) e_loss += l1_coef * l1(e_model) e_loss.backward() e_grad = torch.nn.utils.clip_grad_norm_(e_model.parameters(), 100) e_opt.step() e_opt.zero_grad() d_loss.backward() d_grad = torch.nn.utils.clip_grad_norm_(d_model.parameters(), 100) d_opt.step() d_opt.zero_grad() if step % 250 == 0: stats = {} stats['step'] = step stats['train_acc/e'], stats['train_acc/d'] = e_acc, d_acc stats['train_loss/e'], stats['train_loss/d'] = e_loss, d_loss stats['train_bce/e'], stats['train_bce/d'] = e_bce, d_bce if args.warmup_l1 + args.l1 > 0: stats['l1_coef'] = l1_coef d_nonzero, d_params = nonzero_params(d_model) e_nonzero, e_params = nonzero_params(e_model) stats['d_nonzero'], stats['e_nonzero'] = d_nonzero, e_nonzero with torch.no_grad(): val_samples = next(iter(val_dataloader)) val_features = get_features(val_samples).to(args.device) val_entangled_features = get_features(val_samples, entangle=True).to(args.device) val_labels = get_labels(val_samples, task) if args.use_l0: e_out = copy_and_zero(e_model)(val_entangled_features)[0].cpu() d_out = copy_and_zero(d_model)(val_features)[0].cpu() else: e_out = copy_and_zero(e_model)(val_entangled_features).cpu() d_out = copy_and_zero(d_model)(val_features).cpu() stats['val_auc/e'] = metrics.roc_auc_score(val_labels, e_out) stats['val_auc/d'] = metrics.roc_auc_score(val_labels, d_out) stats['lr/e'], stats['lr/d'] = e_sched.get_lr()[0], d_sched.get_lr()[0] e_pred = e_out > 0 e_acc = (e_pred == val_labels).float().mean() d_pred = d_out > 0 d_acc = (d_pred == val_labels).float().mean() stats['val_acc/e'], stats['val_acc/d'] = e_acc, d_acc # Fetch k wrong predictions #k = 10 #e_wrong_mask = [e_pred != val_labels] #d_wrong_mask = [d_pred != val_labels] #wrong_preds_e, ftrs_e = e_out[e_wrong_mask][:k], val_entangled_features[:k] #wrong_preds_d, ftrs_d = d_out[d_wrong_mask][:k], val_features[:k] to_save = { 'd_model': d_model.state_dict(), 'e_model': e_model.state_dict(), 'd_opt': d_opt.state_dict(), 'e_opt': e_opt.state_dict() } torch.save(to_save, 'checkpoint.pt') if args.log_wandb: wandb.log(stats) else: print_stats(stats) step += 1 e_sched.step() d_sched.step()
model.cuda() logger = NeptuneLogger(api_token=os.getenv('NEPTUNE_API_TOKEN'), project_name = "vladimir.isakov/sandbox", experiment_name = 'Run', upload_source_files='./train.py', #tags = 'v1', params = {'batch_size': args.batch_size, 'epochs': args.epochs, 'lr': args.lr, 'step_size': args.step_size, 'gamma': args.gamma, 'weight_decay': args.weight_decay, 'model': repr(model)}) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) step_scheduler = StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) scheduler = LRScheduler(step_scheduler) criterion = nn.CrossEntropyLoss() def update(engine, batch): inputs, targets = batch
val_dataset, batch_size=batch_size ) print('Data Loaded!') '''Step 2: Model Initialization''' #Model model = MLP(input_dim, output_dim) model.to(device) # model.load_state_dict(torch.load('14_model8.pth.tar')) #Loss Function criterion = nn.CrossEntropyLoss() #Optimizer optimizer = Adam(model.parameters(),lr=lr) '''Step 3: Train the Model''' print('Training begins: ') global_acc = 0 for epoch in range(num_epoch): epoch = epoch+1 print(f'Epoch {epoch} starts:') train_start = time.time() train_loss, train_acc = Train( train_dataloader, model, criterion, optimizer )
class Agent(): def __init__(self, test=False): # device if torch.cuda.is_available(): self.device = torch.device('cuda') else : self.device = torch.device('cpu') self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device) if test: self.load('./pg_best.cpt') # discounted reward self.gamma = 0.99 # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3) # saved rewards and actions self.memory = Memory() self.tensorboard = TensorboardLogger('./') def save(self, save_path): print('save model to', save_path) torch.save(self.model.state_dict(), save_path) def load(self, load_path): print('load model from', load_path) self.model.load_state_dict(torch.load(load_path)) def act(self,x,test=False): if not test: # boring type casting x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) # stochastic sample action_prob = self.model(x) dist = torch.distributions.Categorical(action_prob) action = dist.sample() # memory log_prob self.memory.logprobs.append(dist.log_prob(action)) return action.item() else : self.model.eval() x = ((torch.from_numpy(x)).unsqueeze(0)).float().to(self.device) with torch.no_grad(): action_prob = self.model(x) # a = np.argmax(action_prob.cpu().numpy()) dist = torch.distributions.Categorical(action_prob) action = dist.sample() return action.item() def collect_data(self, state, action, reward): self.memory.actions.append(action) self.memory.rewards.append(torch.tensor(reward)) self.memory.states.append(state) def clear_data(self): self.memory.clear_memory() def update(self): R = 0 advantage_function = [] for t in reversed(range(0, len(self.memory.rewards))): R = R * self.gamma + self.memory.rewards[t] advantage_function.insert(0, R) # turn rewards to pytorch tensor and standardize advantage_function = torch.Tensor(advantage_function).to(self.device) advantage_function = (advantage_function - advantage_function.mean()) / (advantage_function.std() + np.finfo(np.float32).eps) policy_loss = [] for log_prob, reward in zip(self.memory.logprobs, advantage_function): policy_loss.append(-log_prob * reward) # Update network weights self.optimizer.zero_grad() loss = torch.cat(policy_loss).sum() loss.backward() self.optimizer.step() # boring log self.tensorboard.scalar_summary("loss", loss.item()) self.tensorboard.update()
loaders = [train_loader, valid_loader, test_loader, trainA_loader, trainB_loader, validA_loader, validB_loader] names = ['train_loader','valid_loader', 'test_loader',"trainA_loader", "trainB_loader", "validA_loader", "validB_loader"] for loader, name in zip(loaders, names): train_iter = iter(loader) for _ in range(2): _, target = train_iter.next() print(f'{name}', ': Classes {}, counts: {}'.format( *np.unique(target.numpy(), return_counts=True))) ############################# #########Base Line############ ############################## model = MLP() model = model.to(device) for name, param in model.named_parameters(): if param.device.type != 'cuda': print('param {}, not on GPU'.format(name)) optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9) wandb.init( project='Seq Boost2', config=config, name="Baseline p={} mu={} eta={}".format(P,M,E)) model, train_loss, valid_loss = train(model, train_loader, valid_loader, batch_size=BATCH_SIZE, wandb_log=True, consolidate=False, patience=EARLY_STOPPING, n_epochs=config['epoch']) evaluate(model, test_loader, batch_size = BATCH_SIZE)
def main(): np.random.seed(args.seed) cur_acc = 0 max_acc = 0 num_param = 20 cur_param = np.zeros(args.n_epoch) max_pt = np.zeros(args.n_epoch) for iii in range(args.n_iter): for jjj in range(args.n_samples): cur_a = np.random.randn(10) cur_w = np.random.randn(10) cur_b = np.random.randn(10) x = np.arange(args.n_epoch) / args.n_epoch cur_rt = np.dot(np.outer(x, cur_w) + cur_b, cur_a) cur_rt = 1 / (1 + np.exp(-cur_rt)) cur_param = cur_rt.copy() cur_acc = black_box_function(cur_param) if max_acc < cur_acc: max_acc = cur_acc max_pt = cur_param.copy() ''' rate_schedule=np.ones(args.n_epoch)*forget_rate rate_schedule[:10]=np.arange(10,dtype=float)/10*forget_rate # rate_schedule[10:]=np.arange(args.n_epoch-10,dtype=float)/(args.n_epoch-10)*forget_rate+forget_rate rate_schedule=np.zeros(args.n_epoch) print(rate_schedule) ''' rate_schedule = max_pt.copy() print('Final Schedule:', rate_schedule) mean_pure_ratio1 = 0 mean_pure_ratio2 = 0 print('building model...') cnn1 = MLP(n_outputs=num_classes) cnn1.cuda() print(cnn1.parameters) optimizer1 = torch.optim.Adam(cnn1.parameters(), lr=learning_rate) cnn2 = MLP(n_outputs=num_classes) cnn2.cuda() print(cnn2.parameters) optimizer2 = torch.optim.Adam(cnn2.parameters(), lr=learning_rate) epoch = 0 train_acc1 = 0 train_acc2 = 0 # evaluate models with random weights test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2) print( 'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %% Pure Ratio1 %.4f %% Pure Ratio2 %.4f %%' % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2, mean_pure_ratio1, mean_pure_ratio2)) # save results with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' + str(rate_schedule[epoch]) + "\n") # training for epoch in range(1, args.n_epoch): # train models cnn1.train() adjust_learning_rate(optimizer1, epoch) cnn2.train() adjust_learning_rate(optimizer2, epoch) train_acc1, train_acc2, pure_ratio_1_list, pure_ratio_2_list = train( train_loader, epoch, cnn1, optimizer1, cnn2, optimizer2, rate_schedule) # evaluate models test_acc1, test_acc2 = evaluate(test_loader, cnn1, cnn2) # save results mean_pure_ratio1 = sum(pure_ratio_1_list) / len(pure_ratio_1_list) mean_pure_ratio2 = sum(pure_ratio_2_list) / len(pure_ratio_2_list) print( 'Epoch [%d/%d] Test Accuracy on the %s test images: Model1 %.4f %% Model2 %.4f %%, Pure Ratio 1 %.4f %%, Pure Ratio 2 %.4f %%' % (epoch + 1, args.n_epoch, len(test_dataset), test_acc1, test_acc2, mean_pure_ratio1, mean_pure_ratio2)) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc1) + ' ' + str(train_acc2) + ' ' + str(test_acc1) + " " + str(test_acc2) + ' ' + str(mean_pure_ratio1) + ' ' + str(mean_pure_ratio2) + ' ' + str(rate_schedule[epoch]) + "\n")
if __name__ == '__main__': train_filename = "dataset/adult.train.npz" test_filename = "dataset/adult.test.npz" epochs = 50 batch_size = 32 lr = 1e-3 eval_every = 1 train_dataloader = make_dataloader(train_filename, batch_size=batch_size, shuffle=True, drop_last=True) test_dataloader = make_dataloader(test_filename, batch_size=batch_size, shuffle=False, drop_last=False) mlp = MLP() loss = torch.nn.BCEWithLogitsLoss() optimizer = optim.SGD(mlp.parameters(), lr) for epoch in range(epochs): train_loss = train(train_dataloader, mlp, loss, optimizer) print(f"epoch: {epoch}, train loss: {train_loss}") if epoch % eval_every == 0: validate_loss, p, r, auc = validate(test_dataloader, mlp, loss) print( f"epoch: {epoch}, validate loss: {validate_loss}, precision: {p}, recall: {r}, auc: {auc}" )