def load_model(dir): """ Loads models :param dir: directory to load the models from :return: a list of all the models in the directory """ networks = [] for filename in os.listdir(dir): net = Net() net.load_state_dict(torch.load(os.path.join(dir, filename))) net.train(True) net.cuda() networks.append(net) return networks
def main(): use_cuda = torch.cuda.is_available() and args.cuda device = torch.device('cuda' if use_cuda else 'cpu') print('Loading model ...') model = Net(device) if args.load is not None: print('Loading checkpoint ...') model.load_state_dict(torch.load(args.load)) if use_cuda: model.cuda() print('Loading data ...') data_loader = get_data_loader(args.data_root, args.stage, args.batch_size) print('Preparation done') optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) criterion = nn.CrossEntropyLoss() if args.stage == 'train': train(model, data_loader, optimizer, criterion)
def super_resolve(model: Net, input_image: Any, cuda: bool) -> PIL.Image.Image: img = open_image(input_image).convert('YCbCr') y, cb, cr = img.split() img_to_tensor = ToTensor() input = img_to_tensor(y).view(1, -1, y.size[1], y.size[0]) if cuda: model = model.cuda() input = input.cuda() out = model(input) out = out.cpu() out_img_y = out[0].detach().numpy() out_img_y *= 255.0 out_img_y = out_img_y.clip(0, 255) out_img_y = PIL.Image.fromarray(np.uint8(out_img_y[0]), mode='L') out_img_cb = cb.resize(out_img_y.size, PIL.Image.BICUBIC) out_img_cr = cr.resize(out_img_y.size, PIL.Image.BICUBIC) out_img = PIL.Image.merge( 'YCbCr', [out_img_y, out_img_cb, out_img_cr]).convert('RGB') return out_img
class a2c_agent: def __init__(self, envs, args): self.envs = envs self.args = args # define the network self.net = Net(self.envs.action_space.n) if self.args.cuda: self.net.cuda() # define the optimizer self.optimizer = torch.optim.RMSprop(self.net.parameters(), lr=self.args.lr, eps=self.args.eps, alpha=self.args.alpha) if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # check the saved path for envs.. self.model_path = self.args.save_dir + self.args.env_name + '/' if not os.path.exists(self.model_path): os.mkdir(self.model_path) # get the obs.. self.batch_ob_shape = (self.args.num_processes * self.args.nsteps, ) + self.envs.observation_space.shape self.obs = np.zeros( (self.args.num_processes, ) + self.envs.observation_space.shape, dtype=self.envs.observation_space.dtype.name) self.obs[:] = self.envs.reset() self.dones = [False for _ in range(self.args.num_processes)] # train the network.. def learn(self): if not self.args.no_sil: sil_model = sil_module(self.net, self.args, self.optimizer) num_updates = self.args.total_frames // (self.args.num_processes * self.args.nsteps) # get the reward to calculate other information episode_rewards = torch.zeros([self.args.num_processes, 1]) final_rewards = torch.zeros([self.args.num_processes, 1]) # start to update for update in range(num_updates): mb_obs, mb_rewards, mb_actions, mb_dones = [], [], [], [] for step in range(self.args.nsteps): with torch.no_grad(): input_tensor = self._get_tensors(self.obs) _, pi = self.net(input_tensor) # select actions actions = select_actions(pi) cpu_actions = actions.squeeze(1).cpu().numpy() # start to store the information mb_obs.append(np.copy(self.obs)) mb_actions.append(cpu_actions) mb_dones.append(self.dones) # step obs, rewards, dones, _ = self.envs.step(cpu_actions) # process rewards... raw_rewards = copy.deepcopy(rewards) rewards = np.sign(rewards) # start to store the rewards self.dones = dones if not self.args.no_sil: sil_model.step(input_tensor.detach().cpu().numpy(), cpu_actions, raw_rewards, dones) mb_rewards.append(rewards) for n, done in enumerate(dones): if done: self.obs[n] = self.obs[n] * 0 self.obs = obs raw_rewards = torch.from_numpy( np.expand_dims(np.stack(raw_rewards), 1)).float() episode_rewards += raw_rewards # get the masks masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]) final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks # update the obs mb_dones.append(self.dones) # process the rollouts mb_obs = np.asarray(mb_obs, dtype=np.uint8).swapaxes(1, 0).reshape( self.batch_ob_shape) mb_rewards = np.asarray(mb_rewards, dtype=np.float32).swapaxes(1, 0) mb_actions = np.asarray(mb_actions, dtype=np.int32).swapaxes(1, 0) mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0) mb_masks = mb_dones[:, :-1] mb_dones = mb_dones[:, 1:] with torch.no_grad(): input_tensor = self._get_tensors(self.obs) last_values, _ = self.net(input_tensor) # compute returns for n, (rewards, dones, value) in enumerate( zip(mb_rewards, mb_dones, last_values.detach().cpu().numpy().squeeze())): rewards = rewards.tolist() dones = dones.tolist() if dones[-1] == 0: rewards = discount_with_dones(rewards + [value], dones + [0], self.args.gamma)[:-1] else: rewards = discount_with_dones(rewards, dones, self.args.gamma) mb_rewards[n] = rewards mb_rewards = mb_rewards.flatten() mb_actions = mb_actions.flatten() # start to update network vl, al, ent = self._update_network(mb_obs, mb_rewards, mb_actions) # start to update the sil_module if not self.args.no_sil: mean_adv, num_samples = sil_model.train_sil_model() if update % self.args.log_interval == 0: if not self.args.no_sil: print('[{}] Update: {}/{}, Frames: {}, Rewards: {:.2f}, VL: {:.3f}, PL: {:.3f},' \ 'Ent: {:.2f}, Min: {}, Max:{}, BR:{}, E:{}, VS:{}, S:{}'.format(\ datetime.now(), update, num_updates, (update+1)*(self.args.num_processes * self.args.nsteps),\ final_rewards.mean(), vl, al, ent, final_rewards.min(), final_rewards.max(), sil_model.get_best_reward(), \ sil_model.num_episodes(), num_samples, sil_model.num_steps())) else: print('[{}] Update: {}/{}, Frames: {}, Rewards: {:.2f}, VL: {:.3f}, PL: {:.3f},' \ 'Ent: {:.2f}, Min: {}, Max:{}'.format(\ datetime.now(), update, num_updates, (update+1)*(self.args.num_processes * self.args.nsteps),\ final_rewards.mean(), vl, al, ent, final_rewards.min(), final_rewards.max())) torch.save(self.net.state_dict(), self.model_path + 'model.pt') # update_network def _update_network(self, obs, returns, actions): # evaluate the actions input_tensor = self._get_tensors(obs) values, pi = self.net(input_tensor) # define the tensor of actions, returns returns = torch.tensor(returns, dtype=torch.float32).unsqueeze(1) actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(1) if self.args.cuda: returns = returns.cuda() actions = actions.cuda() # evaluate actions action_log_probs, dist_entropy = evaluate_actions(pi, actions) # calculate advantages... advantages = returns - values # get the value loss value_loss = advantages.pow(2).mean() # get the action loss action_loss = -(advantages.detach() * action_log_probs).mean() # total loss total_loss = action_loss + self.args.value_loss_coef * value_loss - self.args.entropy_coef * dist_entropy # start to update self.optimizer.zero_grad() total_loss.backward() torch.nn.utils.clip_grad_norm_(self.net.parameters(), self.args.max_grad_norm) self.optimizer.step() return value_loss.item(), action_loss.item(), dist_entropy.item() # get the tensors... def _get_tensors(self, obs): input_tensor = torch.tensor(np.transpose(obs, (0, 3, 1, 2)), dtype=torch.float32) if self.args.cuda: input_tensor = input_tensor.cuda() return input_tensor
class dueling_agent: def __init__(self, env, args): # define some important self.env = env self.args = args # trying to define the network self.net = Net(self.env.action_space.n) self.target_net = Net(self.env.action_space.n) # make sure the target net has the same weights as the network self.target_net.load_state_dict(self.net.state_dict()) if self.args.cuda: self.net.cuda() self.target_net.cuda() # define the optimizer self.optimizer = torch.optim.Adam(self.net.parameters(), lr=self.args.lr) # define the replay memory self.buffer = replay_memory(self.args.buffer_size) # define the linear schedule of the exploration self.exploration_schedule = linear_schedule(int(self.args.total_timesteps * self.args.exploration_fraction), \ self.args.final_ratio, self.args.init_ratio) # create the folder to save the models if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # set the environment folder self.model_path = os.path.join(self.args.save_dir, self.args.env_name) if not os.path.exists(self.model_path): os.mkdir(self.model_path) # start to do the training def learn(self): episode_reward = [0.0] obs = np.array(self.env.reset()) td_loss = 0 for timestep in range(self.args.total_timesteps): explore_eps = self.exploration_schedule.get_value(timestep) with torch.no_grad(): obs_tensor = self._get_tensors(obs) action_value = self.net(obs_tensor) # select actions action = select_actions(action_value, explore_eps) # excute actions obs_, reward, done, _ = self.env.step(action) obs_ = np.array(obs_) # tryint to append the samples self.buffer.add(obs, action, reward, obs_, float(done)) obs = obs_ # add the rewards episode_reward[-1] += reward if done: obs = np.array(self.env.reset()) episode_reward.append(0.0) if timestep > self.args.learning_starts and timestep % self.args.train_freq == 0: # start to sample the samples from the replay buffer batch_samples = self.buffer.sample(self.args.batch_size) td_loss = self._update_network(batch_samples) if timestep > self.args.learning_starts and timestep % self.args.target_network_update_freq == 0: # update the target network self.target_net.load_state_dict(self.net.state_dict()) if len(episode_reward[-101:-1]) == 0: mean_reward_per_100 = 0 else: mean_reward_per_100 = np.mean(episode_reward[-101:-1]) num_episode = len(episode_reward) - 1 if done and num_episode % self.args.display_interval == 0: print('[{}] Frames: {}, Episode: {}, Mean: {:.3f}, Loss: {:.3f}'.format(datetime.now(), timestep, num_episode, \ mean_reward_per_100, td_loss)) torch.save(self.net.state_dict(), self.model_path + '/model.pt') # update the network def _update_network(self, samples): obses, actions, rewards, obses_next, dones = samples # convert the data to tensor obses = self._get_tensors(obses) actions = torch.tensor(actions, dtype=torch.int64).unsqueeze(-1) rewards = torch.tensor(rewards, dtype=torch.float32).unsqueeze(-1) obses_next = self._get_tensors(obses_next) dones = torch.tensor(1 - dones, dtype=torch.float32).unsqueeze(-1) # convert into gpu if self.args.cuda: actions = actions.cuda() rewards = rewards.cuda() dones = dones.cuda() # calculate the target value with torch.no_grad(): q_value_temp = self.net(obses_next) action_max_idx = torch.argmax(q_value_temp, dim=1, keepdim=True) target_action_value = self.target_net(obses_next) target_action_max_value = target_action_value.gather( 1, action_max_idx) target_action_max_value = target_action_max_value.detach() # target expected_value = rewards + self.args.gamma * target_action_max_value * dones # get the real q value action_value = self.net(obses) real_value = action_value.gather(1, actions) loss = (expected_value - real_value).pow(2).mean() # start to update self.optimizer.zero_grad() loss.backward() self.optimizer.step() return loss.item() # get tensors def _get_tensors(self, obs): if obs.ndim == 3: obs = np.transpose(obs, (2, 0, 1)) obs = np.expand_dims(obs, 0) elif obs.ndim == 4: obs = np.transpose(obs, (0, 3, 1, 2)) obs = torch.tensor(obs, dtype=torch.float32) if self.args.cuda: obs = obs.cuda() return obs
def main(): warnings.filterwarnings("ignore") ####################################################################################################################### """Command line interface""" parser = argparse.ArgumentParser() # parser.add_argument('--dataset', required=False, help='cifar10 | mnist | fmnist '| svhn', default='mnist') # parser.add_argument('--dataroot', required=False, help='path to dataset', default='./data/data.csv') parser.add_argument('--workers', type=int, help='number of data loading workers', default=6) # parser.add_argument('--batchSize', type=int, default=32, help='input batch size') parser.add_argument('--len', type=int, default=64, help='the height / width of the input to network') # parser.add_argument('--saveInt', type=int, default=14, help='number of epochs between checkpoints') parser.add_argument('--cuda', action='store_true', help='enables cuda', default=True) # parser.add_argument('--outf', default='output', help='folder to output images and model checkpoints') parser.add_argument('--manualSeed', type=int, help='manual seed') parser.add_argument('--net', help="path to net (to continue training)", default='./net_epoch_196.pth') parser.add_argument('--nc', default=20, help="number of channels", type=int) opt = parser.parse_args() with open("FocusMuseDataset_mean_std.pkl", "rb") as f: mean = pickle.load(f) std = pickle.load(f) ###################################################################################################################### if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) device = torch.device("cuda" if opt.cuda else "cpu") ###################################################################################################################### """Server""" HOST = '' PORT = 65531 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((HOST, PORT)) print("binding") s.listen(5) conn, addr = s.accept() print('Connected by', addr) while True: data = conn.recv(16184) if not data: break test_x = np.fromstring(data, dtype=np.float).reshape(20, -1) test_x = torch.Tensor(test_x) print("test data: ", type(test_x), test_x.shape) # """Dataset loading""" # tensor_test_x = torch.stack([torch.Tensor(i) for i in test_x]) # transform to torch tensors # # dataset = vdata.TensorDataset(tensor_test_x) # create your datset # testloader = DataLoader(dataset, drop_last=False) # create your dataloader ####################################################################################################################### net = Net(insize=opt.len, output_size=128, nc=opt.nc, hidden_size=64, n_layers=2) # net.apply(weights_init) if opt.net != '': print("loading trained net...") net.load_state_dict(torch.load(opt.net)) # print(net) if opt.cuda: net.cuda() test_x.cuda() # total, nonzero = 0, 0 test_x = Variable(test_x) outputs = net(test_x) predicted = torch.argmax(outputs, dim=1) # if opt.cuda: # p2 = predicted.cpu().detach().numpy() # else: # p2 = predicted.numpy() # nonzero += np.count_nonzero(p2) # total += p2.shape[0] # # ratio = ((total - nonzero) / total) * 100 # ratio = _test(opt=opt, net=net, testloader=testloader) conn.send(outputs[1].encode()) conn.close()
batch_size=batch_size, shuffle=True, num_workers=2) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=2) net = Net() criterion = torch.nn.MSELoss() optimizer = optim.Adam(net.parameters(), lr=0.001) if torch.cuda.is_available(): dtype = torch.cuda.FloatTensor net.cuda() print('Training on GPU ...') else: dtype = torch.FloatTensor print('Training on CPU ...') def train_net(n_epochs): net.train() for epoch in range(n_epochs): # loop over the dataset multiple times train_loss = 0.0 test_loss = 0.0 for batch_i, data in enumerate(train_loader): images = data['image'] key_pts = data['keypoints']
batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( 'datas', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))])), batch_size=args.batch_size, shuffle=True, **kwargs) model = Net() if args.cuda: model.cuda() # 将所有的模型参数移动到GPU上 optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) def train(epoch): model.train() # 把module设成training模式,对Dropout和BatchNorm有影响 for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() # Variable类对Tensor对象进行封装,会保存该张量对应的梯度,以及对生成该张量的函数grad_fn的一个引用。如果该张量是用户创建的,grad_fn是None,称这样的Variable为叶子Variable。 data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) # 负log似然损失 loss.backward()
class NNet(): """ Wrapper to manage neural net. """ def __init__(self, args): self.args = args self.num_channels = NUM_CHANNELS if args.netType == 1: self.net = Net(self.num_channels, args) elif args.netType == 2: self.net = Net2(self.num_channels, args) if args.cuda: self.net = self.net.cuda() self.load_dataset_from_folder() self.writer = SummaryWriter() self.unique_tok = str(time.time()) self.init_weights() def init_weights(self): """ Initialize by Xavier weights """ self.net.apply(init_weights) def load_dataset_from_folder(self): """ Load complete dataset """ all_data_path = self.args.all_data_path validation_split_size = self.args.validation_split_size batch_size = self.args.batch_size num_workers = self.args.num_workers shuffle = self.args.shuffle all_data = ImageFolder(root=all_data_path, transform=TRANSFORM) classes = all_data.classes self.classes = classes validation_size = int(validation_split_size * len(all_data)) test_size = int(validation_split_size * len(all_data)) train_size = len(all_data) - 2 * validation_size train_dataset, val_dataset, test_dataset = random_split( all_data, [train_size, validation_size, test_size]) training_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle) validation_dataset_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle) test_dataset_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle) self.train_loader = training_data_loader self.val_loader = validation_dataset_loader self.test_loader = test_dataset_loader def train(self): """ Train Neural Net """ if self.args.optim == 'RMSprop': optimizer = optim.RMSprop(self.net.parameters(), lr=self.args.lr, momentum=self.args.momentum, weight_decay=self.args.l2_regularization) elif self.args.optim == 'SGD': optimizer = optim.SGD(self.net.parameters(), lr=self.args.lr, momentum=self.args.momentum) elif self.args.optim == 'Adam': optimizer = optim.Adam(self.net.parameters(), lr=self.args.lr) criterion = nn.CrossEntropyLoss() # scheduler = optim.lr_scheduler.StepLR( # optimizer, step_size=self.args.scheduler_step_size, gamma=self.args.scheduler_gamma) self.net.train() for epoch in range(self.args.epoch): start_time = time.time() running_loss_t = 0.0 num_batches = 0 y_true = [] y_pred = [] # print('Epoch: {} , LR: {}'.format(epoch+1, scheduler.get_lr())) for data in tqdm(self.train_loader): inputs, labels = data labels_cp = labels.clone() # imshow(torchvision.utils.make_grid(inputs[:,:3,:,:])) if len(inputs) < 2: continue if self.args.cuda: inputs = inputs.cuda() labels = labels.cuda() outputs = self.net(inputs) loss = criterion(outputs, labels) _, predicted = torch.max(outputs, 1) predicted = predicted.cpu() for i, pred in enumerate(predicted): y_pred.append(pred) y_true.append(labels_cp[i]) optimizer.zero_grad() loss.backward() optimizer.step() running_loss_t += loss.item() num_batches += 1 end_time = time.time() train_f1 = f1_score(y_true, y_pred, average='weighted') # scheduler.step() self.save(epoch + 1) self.writer.add_scalar('Loss/train', running_loss_t / num_batches, epoch + 1) self.writer.add_scalar('F1/train', train_f1, epoch + 1) loss_v, val_f1 = self.get_validation_loss(criterion) self.writer.add_scalar('Loss/val', loss_v, epoch + 1) self.writer.add_scalar('F1/val', val_f1, epoch + 1) print( "Epoch {} Time {:.2f}s Train-Loss {:.3f} Val-Loss {:.3f} Train-F1 {:.3f} Val-F1 {:.3f}" .format(epoch + 1, end_time - start_time, running_loss_t / num_batches, loss_v, train_f1, val_f1)) def get_validation_loss(self, criterion): """ Check validation loss """ running_loss = 0.0 num_batches = 0 self.net.eval() y_true = [] y_pred = [] with torch.no_grad(): for data in tqdm(self.val_loader): images, labels = data labels_cp = labels.clone() if self.args.cuda: images = images.cuda() labels = labels.cuda() outputs = self.net(images) _, predicted = torch.max(outputs, 1) predicted = predicted.cpu() for i, pred in enumerate(predicted): y_pred.append(pred) y_true.append(labels_cp[i]) loss = criterion(outputs, labels) running_loss += loss.item() num_batches += 1 self.net.train() val_f1 = f1_score(y_true, y_pred, average='weighted') return running_loss / num_batches, val_f1 def get_test_accuracy(self): """ Check overall accuracy of model """ y_true = [] y_pred = [] class_correct = list(0. for i in range(4)) class_total = list(0. for i in range(4)) with torch.no_grad(): for data in tqdm(self.test_loader): images, labels = data labels_cp = labels.clone() if self.args.cuda: images = images.cuda() labels = labels.cuda() outputs = self.net(images) _, predicted = torch.max(outputs, 1) predicted = predicted.cpu() for i, pred in enumerate(predicted): y_pred.append(pred) y_true.append(labels_cp[i]) c = (predicted == labels_cp).squeeze() for i in range(min(self.args.batch_size, len(labels_cp))): label = labels_cp[i] class_correct[label] += c[i].item() class_total[label] += 1 print("Test F1: ", f1_score(y_true, y_pred, average='weighted')) def save(self, epochs, folder_path="../models/"): """ Save Model """ dict_save = {'params': self.net.state_dict(), 'classes': self.classes} name = folder_path + self.unique_tok + '_' + str(epochs) + '.model' torch.save(dict_save, name) print('Model saved at {}'.format(name)) return name def load(self, path): """ Load a saved model """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dict_load = torch.load(path, map_location=torch.device(device)) self.net.load_state_dict(dict_load['params']) return dict_load['classes'] def predict(self, inp): """ Predict using net """ if self.args.cuda: inp = inp.cuda() self.net.eval() with torch.no_grad(): vals = self.net(inp) print(vals) _, predicted = torch.max(vals, 1) predicted = predicted.cpu() result_class = self.classes[predicted] return result_class
def main(): global args, best_prec1 args = parser.parse_args() transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset_class = CIFAR10(root='.', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader(trainset_class, batch_size=args.batch_size, shuffle=True, num_workers=4) testset = CIFAR10(root='.', train=False, download=True, transform=transform_test) val_loader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=2) model = Net() model = model.cuda() ncnn = len(model.main_cnn.blocks) n_cnn = len(model.main_cnn.blocks) with open(name_log_txt, "a") as text_file: print(model, file=text_file) ############### Initialize all layer_optim = [None] * ncnn layer_lr = [0.1] * ncnn for n in range(ncnn): to_train = itertools.chain(model.main_cnn.blocks[n].parameters(), model.auxillary_nets[n].parameters()) layer_optim[n] = optim.SGD(to_train, lr=layer_lr[n], momentum=0.9, weight_decay=5e-4) ######################### Lets do the training criterion = nn.CrossEntropyLoss().cuda() for epoch in range(1, args.epochs + 1): # Make sure we set the bn right model.train() #For each epoch let's store each layer individually batch_time = [AverageMeter() for _ in range(n_cnn)] batch_time_total = AverageMeter() data_time = AverageMeter() losses = [AverageMeter() for _ in range(n_cnn)] top1 = [AverageMeter() for _ in range(n_cnn)] for n in range(ncnn): layer_lr[n] = lr_scheduler(0.1, epoch - 1) for param_group in layer_optim[n].param_groups: param_group['lr'] = layer_lr[n] end = time.time() for i, (inputs, targets) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) targets = targets.cuda(non_blocking=True) inputs = inputs.cuda(non_blocking=True) inputs = torch.autograd.Variable(inputs) targets = torch.autograd.Variable(targets) #Main loop representation = inputs end_all = time.time() for n in range(ncnn): end = time.time() # Forward layer_optim[n].zero_grad() outputs, representation = model(representation, n=n) loss = criterion(outputs, targets) loss.backward() layer_optim[n].step() representation = representation.detach() # measure accuracy and record loss # measure elapsed time batch_time[n].update(time.time() - end) prec1 = accuracy(outputs.data, targets) losses[n].update(float(loss.data[0]), float(inputs.size(0))) top1[n].update(float(prec1[0]), float(inputs.size(0))) for n in range(ncnn): ##### evaluate on validation set top1test = validate(val_loader, model, criterion, epoch, n) with open(name_log_txt, "a") as text_file: print( "n: {}, epoch {}, loss: {:.5f}, train top1:{} test top1:{} " .format(n + 1, epoch, losses[n].avg, top1[n].avg, top1test), file=text_file)
acc_loss += loss.item() c += 1 if use_tqdm: batch_results = dict() batch_results['epoch'] = epoch batch_results['loss'] = loss.item() pbar.set_postfix(batch_results) return acc_loss / c # Create the network. model = None model = Net(output_dim=2, net_size=args.net_size) model.cuda() # If network weights are available, load them. if args.resume: print('Resuming from checkpoint: {}'.format(args.resume)) model.load_state_dict(torch.load(args.resume)) # Otherwise, train the network. else: print('Training the model.') optimizer = optim.SGD(model.params_regular, lr=args.learning_rate, momentum=args.momentum) for epoch in range(1, args.epoch + 1): train_ce(args, model, train_loader, optimizer, epoch, use_tqdm=True)
def main(): global opt, model opt = parser.parse_args() print(opt) cuda = opt.cuda if cuda: print("=> use gpu id: '{}'".format(opt.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpus if not torch.cuda.is_available(): raise Exception( "No GPU found or Wrong gpu id, please run without --cuda") opt.seed = random.randint(1, 10000) print("Random Seed: ", opt.seed) torch.manual_seed(opt.seed) if cuda: torch.cuda.manual_seed(opt.seed) cudnn.benchmark = True print("===> Loading datasets") # list all possible datasets dataset_names = glob.glob(opt.dataset + "*") # construct the traning by concatenating the available datasets train_set_list = [ DVDTrainingDataset(dataset_name) for dataset_name in dataset_names ] train_set = ConcatDataset(train_set_list) training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True) print("===> Building model") if opt.model == "spatio": model = spatioModel() elif opt.model == "vdsr": model = Net() elif opt.model == "dvd": model = DVDModel() else: model = Net() criterion = nn.MSELoss(reduction='mean') print("===> Setting GPU") if cuda: model = model.cuda() criterion = criterion.cuda() # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) opt.start_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model"].state_dict()) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # optionally copy weights from a checkpoint if opt.pretrained: if os.path.isfile(opt.pretrained): print("=> loading model '{}'".format(opt.pretrained)) weights = torch.load(opt.pretrained) model.load_state_dict(weights['model'].state_dict()) else: print("=> no model found at '{}'".format(opt.pretrained)) print("===> Setting Optimizer") optimizer = optim.SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) print("===> Training") for epoch in range(opt.start_epoch, opt.nEpochs + 1): train(training_data_loader, optimizer, model, criterion, epoch) save_checkpoint(model, epoch)
def main(): global opt, model opt = parser.parse_args() print opt cuda = opt.cuda if cuda and not torch.cuda.is_available(): raise Exception("No GPU found, please run without --cuda") opt.seed = random.randint(1, 10000) print("Random Seed: ", opt.seed) torch.manual_seed(opt.seed) if cuda: torch.cuda.manual_seed(opt.seed) cudnn.benchmark = True print("===> Loading datasets") train_set = get_training_set(opt.train_dir) #train_set = DatasetFromHdf5("data/lap_pry_x4_small.h5") training_data_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True) print("===> Building model") model = Net(opt.scale_factor) criterion = L1_Charbonnier_loss() print("===> Setting GPU") if cuda: model = model.cuda() criterion = criterion.cuda() else: model = model.cpu() # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) opt.start_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model"].state_dict()) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # optionally copy weights from a checkpoint if opt.pretrained: if os.path.isfile(opt.pretrained): print("=> loading model '{}'".format(opt.pretrained)) weights = torch.load(opt.pretrained) model.load_state_dict(weights['model'].state_dict()) else: print("=> no model found at '{}'".format(opt.pretrained)) print("===> Setting Optimizer") optimizer = optim.Adam(model.parameters(), lr=opt.lr) print("===> Training") for epoch in range(opt.start_epoch, opt.nEpochs + 1): train(training_data_loader, optimizer, model, criterion, epoch) save_checkpoint(model, epoch)
#optimizer = optim.SGD(net.parameters(), learning_rate, momentum=0.9) optimizer = optim.Adam(params=net.parameters(), lr=0.001) # ## Training and Initial Observation # # Now, you'll train on your batched training data from `train_loader` for a number of epochs. # # To quickly observe how your model is training and decide on whether or not you should modify it's structure or hyperparameters, you're encouraged to start off with just one or two epochs at first. As you train, note how your the model's loss behaves over time: does it decrease quickly at first and then slow down? Does it take a while to decrease in the first place? What happens if you change the batch size of your training data or modify your loss function? etc. # # Use these initial observations to make changes to your model and decide on the best architecture before you train for many epochs and create a final model. # In[ ]: got_cuda = torch.cuda.is_available() if got_cuda: net.cuda() #***well push that model to the GPU def train_net(n_epochs): import random # prepare the net for training net.train() for epoch in range(n_epochs): # loop over the dataset multiple times file_name = "model-" + str(random.randint( 1, 99999999)) + "epoch" + str(epoch) print("Writing: " + file_name) print("EPOCH: " + str(epoch))