def main(): global args args = parser.parse_args() save_path = os.path.join(args.save_root, "temporal_" + args.model) make_save_dir(save_path) vis = visdom.Visdom() loss_plot = vis.line(X=np.asarray([0]), Y=np.asarray([0])) acc_plot = vis.line(X=np.asarray([0]), Y=np.asarray([0])) loader = data_loader.MotionDataLoader(img_size=args.img_size, batch_size=args.batch_size, num_workers=8, in_channel=args.stack_size, path=args.data_root, txt_path=args.text_root, split_num=args.split_num) train_loader, test_loader, test_video = loader.run() model = set_model(args.model, args.stack_size, str2bool(args.pretrained)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.cuda(device=device) criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, verbose=True) cur_best_acc = 0 for epoch in range(1, args.epoch + 1): train_acc, train_loss, model = train_1epoch(model, train_loader, optimizer, criterion, epoch, args.epoch) print("Train Accuacy:", train_acc, "Train Loss:", train_loss) val_acc, val_loss, video_level_pred = val_1epoch( model, test_loader, criterion, epoch, args.epoch) print("Validation Accuracy:", val_acc, "Validation Loss:", val_loss) # lr scheduler scheduler.step(val_loss) is_best = val_acc > cur_best_acc if is_best: cur_best_acc = val_acc with open(os.path.join(save_path, 'temporal_video_preds.pickle'), 'wb') as f: pickle.dump(video_level_pred, f) f.close() vis.line(X=np.asarray([epoch]), Y=np.asarray([train_loss]), win=loss_plot, update="append", name='Train Loss') vis.line(X=np.asarray([epoch]), Y=np.asarray([train_acc]), win=acc_plot, update="append", name="Train Accuracy") vis.line(X=np.asarray([epoch]), Y=np.asarray([val_loss]), win=loss_plot, update="append", name='Validation Loss') vis.line(X=np.asarray([epoch]), Y=np.asarray([val_acc]), win=acc_plot, update="append", name="Validation Accuracy") save_best_model(is_best, model, save_path, epoch)
def main(): global args, best_prec1 args = parser.parse_args() args.distributed = args.world_size > 1 # create model print("=> creating model '{}'".format(args.arch)) if args.arch == 'localizer_alexnet': model = localizer_alexnet(pretrained=args.pretrained) elif args.arch == 'localizer_alexnet_robust': model = localizer_alexnet_robust(pretrained=args.pretrained) args.lr = args.lr print(model) # torch.cuda.set_device(0) # print(torch.cuda.current_device()) model.features = torch.nn.DataParallel(model.features) model.cuda() # TODO: # define loss function (criterion) and optimizer criterion = nn.MultiLabelSoftMarginLoss().cuda() # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) optimizer = torch.optim.SGD(model.classifier.parameters(), args.lr) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code # TODO: Write code for IMDBDataset in custom.py trainval_imdb = get_imdb('voc_2007_trainval') test_imdb = get_imdb('voc_2007_test') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = IMDBDataset( trainval_imdb, transforms.Compose([ transforms.Resize((512, 512)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(IMDBDataset( test_imdb, transforms.Compose([ transforms.Resize((384, 384)), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return # TODO: Create loggers for visdom and tboard # TODO: You can pass the logger objects to train(), make appropriate # modifications to train() if args.arch == 'localizer_alexnet': data_log = logger.Logger('./logs/', name='freeloc') vis = visdom.Visdom(server='http://localhost', port='8097') else: data_log = logger.Logger('./logs_robust/', name='freeloc') vis = visdom.Visdom(server='http://localhost', port='8090') if args.arch == 'localizer_alexnet': args.epochs = 30 else: args.epochs = 45 for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, data_log, vis, args.arch) # evaluate on validation set if epoch % args.eval_freq == 0 or epoch == args.epochs - 1: m1, m2 = validate(val_loader, model, criterion) score = m1 * m2 # remember best prec@1 and save checkpoint is_best = score > best_prec1 best_prec1 = max(score, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best) print('end of training')
def binary_learning(train_loader, network, criterion, test_loader, optimizer, start_epoch, lr_scheduler): vis = visdom.Visdom() r_loss = [] r_average_f1 = [] iterations = [] epochs = [] total_iteration = 0 options = dict(legend=['loss']) loss_plot = vis.line(Y=np.zeros(1), X=np.zeros(1), opts=options) options = dict(legend=['average_f1']) average_f1_plot = vis.line(Y=np.zeros(1), X=np.zeros(1), opts=options) for epoch in range(start_epoch, params.number_of_epochs_for_metric_learning): print('current_learning_rate =', optimizer.param_groups[0]['lr'], ' ', datetime.datetime.now()) i = 0 for data in train_loader: i = i + 1 inputs, labels = data # print('inputs ', inputs) # batch_size x 3 x 64 x 64 # we need pairs of images in our batch # print('inputs, labels ', labels) # and +1/-1 labels matrix labels_matrix = utils.get_labels_matrix_fast(labels, labels).view(-1, 1) indices_for_loss = get_indices_for_loss(labels_matrix, negative_pair_sign=0) labels_matrix = labels_matrix[indices_for_loss] labels_matrix = Variable(labels_matrix).cuda() # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize # here we should create input pair for the network from just inputs outputs = network(Variable(inputs).cuda()) outputs = outputs[indices_for_loss.cuda(), :] # print('outputs ', outputs) # print('labels_matrix.long().view(-1, 1).squeeze() ', labels_matrix.long().view(-1, 1).squeeze()) loss = criterion(outputs, labels_matrix.long().view(-1, 1).squeeze()) loss.backward() optimizer.step() # print statistics current_batch_loss = loss.data[0] if i % 10 == 0: # print every 2000 mini-batches print('[epoch %d, iteration in the epoch %5d] loss: %.30f' % (epoch + 1, i + 1, current_batch_loss)) # print('PCA matrix ', network.spoc.PCA_matrix) r_loss.append(current_batch_loss) iterations.append(total_iteration + i) options = dict(legend=['loss']) loss_plot = vis.line(Y=np.array(r_loss), X=np.array(iterations), win=loss_plot, opts=options) lr_scheduler.step(epoch=epoch, metrics=current_batch_loss) if epoch % 10 == 0: epochs.append(epoch) # print the quality metric gc.collect() print('Evaluation on train internal', datetime.datetime.now()) average_f1 = test.test_for_binary_classification_1_batch( train_loader, network) r_average_f1.append(average_f1) options = dict(legend=['average_f1']) average_f1_plot = vis.line(Y=np.array(r_average_f1), X=np.array(epochs), win=average_f1_plot, opts=options) print('Evaluation on test internal', datetime.datetime.now()) average_f1 = test.test_for_binary_classification_1_batch( test_loader, network) utils.save_checkpoint( network=network, optimizer=optimizer, filename=params. name_prefix_for_saved_model_for_binary_classification + '-%d' % (epoch), epoch=epoch) total_iteration = total_iteration + i print('Finished Training for binary classification')
target_sizes = torch.IntTensor(batch_size) targets = [] input_percentages_list = [] for x in range(batch_size): sample = batch[x] feature = sample[0] label = sample[1] seq_length = feature.size(0) inputs[x][0].narrow(0, 0, seq_length).copy_(feature) input_percentages[x] = seq_length / float(max_length) input_percentages_list.append(seq_length / float(max_length)) target_sizes[x] = len(label) targets.extend(label) targets = torch.IntTensor(targets) return inputs, targets, input_percentages, input_percentages_list, target_sizes if __name__ == '__main__': dev_dataset = myDataset('../data_prepare/data', data_set='train', feature_type="spectrum", out_type='phone', n_feats=201, mel=True) #dev_dataloader = myDataLoader(dev_dataset, batch_size=2, shuffle=True) import visdom viz = visdom.Visdom(env='fan') for i in range(1): show = dev_dataset[i][0].transpose(0, 1) text = dev_dataset[i][1] for num in range(len(text)): text[num] = dev_dataset.int2phone[text[num]] text = ' '.join(text) opts = dict(title=text, xlabel='frame', ylabel='spectrum') viz.heatmap(show, opts = opts)
def train(config, logger, train_and_valid_data): if config.do_train_visualized: import visdom vis = visdom.Visdom(env='model_pytorch') train_X, train_Y, valid_X, valid_Y = train_and_valid_data train_X, train_Y = torch.from_numpy(train_X).float(), torch.from_numpy( train_Y).long() # 先转为Tensor train_loader = DataLoader( TensorDataset(train_X, train_Y), batch_size=config.batch_size, shuffle=config.shuffle_train_data) # DataLoader可自动生成可训练的batch数据 valid_X, valid_Y = torch.from_numpy(valid_X).float(), torch.from_numpy( valid_Y).long() valid_loader = DataLoader(TensorDataset(valid_X, valid_Y), batch_size=config.batch_size) device = torch.device( "cuda:0" if config.use_cuda and torch.cuda.is_available() else "cpu") # CPU训练还是GPU model = Net(config).to(device) # 如果是GPU训练, .to(device) 会把模型/数据复制到GPU显存中 if config.add_train: # 如果是增量训练,会先加载原模型参数 model.load_state_dict( torch.load(config.model_save_path + config.model_name)) optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=config.lr_step_size, gamma=0.1) criterion = cross_entropy2d # 这两句是定义优化器和loss valid_loss_min = float("inf") bad_epoch = 0 global_step = 0 train_losses = [] valid_losses = [] for epoch in range(config.epoch): logger.info("Epoch {}/{}".format(epoch, config.epoch)) model.train() # pytorch中,训练时要转换成训练模式 train_loss_array = [] for i, _data in enumerate(train_loader): _train_X, _train_Y = _data[0].to(device), _data[1].to(device) optimizer.zero_grad() # 训练前要将梯度信息置 0 pred_Y = model(_train_X) # 这里走的就是前向计算forward函数 loss = criterion(pred_Y, _train_Y) # 计算loss loss.backward() # 将loss反向传播 optimizer.step() # 用优化器更新参数 train_loss_array.append(loss.item()) global_step += 1 if config.do_train_visualized and global_step % 100 == 0: # 每一百步显示一次 vis.line(X=np.array([global_step]), Y=np.array([loss.item()]), win='Train_Loss', update='append' if global_step > 0 else None, name='Train', opts=dict(showlegend=True)) scheduler.step() model.eval() # pytorch中,预测时要转换成预测模式 valid_loss_array = [] for _valid_X, _valid_Y in valid_loader: _valid_X, _valid_Y = _valid_X.to(device), _valid_Y.to(device) pred_Y = model(_valid_X) loss = criterion(pred_Y, _valid_Y) # 验证过程只有前向计算,无反向传播过程 valid_loss_array.append(loss.item()) train_loss_cur = np.mean(train_loss_array) valid_loss_cur = np.mean(valid_loss_array) train_losses.append(train_loss_cur) valid_losses.append(valid_loss_cur) logger.info("The train loss is {:.6f}. ".format(train_loss_cur) + "The valid loss is {:.6f}. ".format(valid_loss_cur) + 'lr is {:.6f}.'.format(scheduler.get_lr()[0])) if config.do_train_visualized: # 第一个train_loss_cur太大,导致没有显示在visdom中 vis.line(X=np.array([epoch]), Y=np.array([train_loss_cur]), win='Epoch_Loss', update='append' if epoch > 0 else None, name='Train', opts=dict(showlegend=True)) vis.line(X=np.array([epoch]), Y=np.array([valid_loss_cur]), win='Epoch_Loss', update='append' if epoch > 0 else None, name='Eval', opts=dict(showlegend=True)) # 以下为早停机制,当模型训练连续config.patience个epoch都没有使验证集预测效果提升时,就停止,防止过拟合 if valid_loss_cur < valid_loss_min: valid_loss_min = valid_loss_cur bad_epoch = 0 torch.save(model.state_dict(), config.model_save_path + config.model_name) # 模型保存 else: bad_epoch += 1 if bad_epoch >= config.patience: # 如果验证集指标连续patience个epoch没有提升,就停掉训练 logger.info( " The training stops early in epoch {}".format(epoch)) break x_axis = [i for i in range(1, config.epoch + 1)] plt.plot(x_axis, train_losses, 'r', label='train loss') plt.plot(x_axis, valid_losses, 'b', label='validation loss') plt.xlabel('epoch') plt.ylabel('loss') plt.legend() # 将样例显示出来 plt.title(config.model_name) plt.show()
parser.add_argument('--fix_decoder', default='False', help='if set to True, on the pointNet encoder is trained') parser.add_argument('--k1', type=float, default=0.2) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--lrDecay', type=float, default=0.1) parser.add_argument('--lrStep', type=float, default=300) parser.add_argument('--start_eval_epoch', type=float, default=100) parser.add_argument('--category', type=str, default='all/chair') parser.add_argument('--rotate', action='store_true') parser.add_argument('--white_bg', action='store_true') opt = parser.parse_args() print(opt) #Launch visdom for visualization vis = visdom.Visdom(port=8000, env=opt.env) now = datetime.datetime.now() save_path = now.isoformat() + opt.env #now.isoformat() if opt.rotate: dir_name = os.path.join('./checkpoints/%s_rotate/%s' % (opt.category, save_path)) else: dir_name = os.path.join('./checkpoints/%s/%s' % (opt.category, save_path)) if not os.path.exists(dir_name): os.makedirs(dir_name) logname = os.path.join(dir_name, 'log.txt') blue = lambda x: '\033[94m' + x + '\033[0m' opt.manualSeed = random.randint(1, 10000) # fix seed print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed)
def load_state_dict(self, d): self.vis = visdom.Visdom(env=d.get('env', self.vis.env), **(self.d.get('vis_kw'))) self.log_text = d.get('log_text', '') self.index = d.get('index', dict()) return self
args = parser print(args) EXPERIMENT_CODE = 'as_76' if not os.path.exists(f'checkpoint/{EXPERIMENT_CODE}/'): print( f'New EXPERIMENT_CODE:{EXPERIMENT_CODE}, creating saving directories ...', end='') os.mkdir(f'checkpoint/{EXPERIMENT_CODE}/') os.mkdir(f'sample/{EXPERIMENT_CODE}/') print('Done') else: print('EXPERIMENT_CODE already exits.') viz = visdom.Visdom(server='10.10.10.100', port=33241, env=args.env) DESCRIPTION = """ SPADE;Z=pose;Seg=app; """\ f'file: tz_main_v18_c6_spade.py;\n '\ f'Hostname: {socket.gethostname()}; ' \ f'Experiment_Code: {EXPERIMENT_CODE};\n' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu device = 'cuda' torch.backends.cudnn.benchmark = True transform = transforms.Compose([ transforms.Resize(args.size),
def train(): time_1 = datetime.datetime.now() step_time = [] config = generic.load_config() agent = TextDAggerAgent(config) alfred_env = getattr(alfworld.agents.environment, config["env"]["type"])(config, train_eval="train") env = alfred_env.init_env(batch_size=agent.batch_size) id_eval_env, num_id_eval_game = None, 0 ood_eval_env, num_ood_eval_game = None, 0 if agent.run_eval: # in distribution if config['dataset']['eval_id_data_path'] is not None: alfred_env = getattr(alfworld.agents.environment, config["general"]["evaluate"]["env"]["type"])( config, train_eval="eval_in_distribution") id_eval_env = alfred_env.init_env(batch_size=agent.eval_batch_size) num_id_eval_game = alfred_env.num_games # out of distribution if config['dataset']['eval_ood_data_path'] is not None: alfred_env = getattr(alfworld.agents.environment, config["general"]["evaluate"]["env"]["type"])( config, train_eval="eval_out_of_distribution") ood_eval_env = alfred_env.init_env( batch_size=agent.eval_batch_size) num_ood_eval_game = alfred_env.num_games output_dir = config["general"]["save_path"] data_dir = config["general"]["save_path"] if not os.path.exists(output_dir): os.makedirs(output_dir) # visdom if config["general"]["visdom"]: import visdom viz = visdom.Visdom() reward_win, step_win = None, None loss_win = None viz_game_points, viz_game_step, viz_loss = [], [], [] viz_student_points, viz_student_step = [], [] viz_id_eval_game_points, viz_id_eval_step = [], [] viz_ood_eval_game_points, viz_ood_eval_step = [], [] step_in_total = 0 episode_no = 0 running_avg_game_points = HistoryScoreCache(capacity=500) running_avg_student_points = HistoryScoreCache(capacity=500) running_avg_game_steps = HistoryScoreCache(capacity=500) running_avg_student_steps = HistoryScoreCache(capacity=500) running_avg_dagger_loss = HistoryScoreCache(capacity=500) json_file_name = agent.experiment_tag.replace(" ", "_") best_performance_so_far, best_ood_performance_so_far = 0.0, 0.0 # load model from checkpoint if agent.load_pretrained: print("Checking {} for {}".format(data_dir, agent.load_from_tag)) if os.path.exists(data_dir + "/" + agent.load_from_tag + ".pt"): agent.load_pretrained_model(data_dir + "/" + agent.load_from_tag + ".pt") agent.update_target_net() while (True): if episode_no > agent.max_episode: break np.random.seed(episode_no) env.seed(episode_no) obs, infos = env.reset() game_names = infos["extra.gamefile"] batch_size = len(obs) agent.train() agent.init(batch_size) previous_dynamics = None execute_actions = [] prev_step_dones, prev_rewards = [], [] for _ in range(batch_size): execute_actions.append("restart") prev_step_dones.append(0.0) prev_rewards.append(0.0) observation_strings = list(obs) task_desc_strings, observation_strings = agent.get_task_and_obs( observation_strings) task_desc_strings = agent.preprocess_task(task_desc_strings) observation_strings = agent.preprocess_observation(observation_strings) first_sight_strings = copy.deepcopy(observation_strings) agent.observation_pool.push_first_sight(first_sight_strings) if agent.action_space == "exhaustive": action_candidate_list = [ extract_admissible_commands(intro, obs) for intro, obs in zip(first_sight_strings, observation_strings) ] else: action_candidate_list = list(infos["admissible_commands"]) action_candidate_list = agent.preprocess_action_candidates( action_candidate_list) observation_strings = [ item + " [SEP] " + a for item, a in zip(observation_strings, execute_actions) ] # appending the chosen action at previous step into the observation # it requires to store sequences of transitions into memory with order, # so we use a cache to keep what agents returns, and push them into memory # altogether in the end of game. transition_cache = [] still_running_mask = [] sequence_game_points = [] print_actions = [] report = agent.report_frequency > 0 and ( episode_no % agent.report_frequency <= (episode_no - batch_size) % agent.report_frequency) for step_no in range(agent.max_nb_steps_per_episode): # push obs into observation pool agent.observation_pool.push_batch(observation_strings) # get most recent k observations most_recent_observation_strings = agent.observation_pool.get() # predict actions if agent.action_space == "generation": agent_actions, current_dynamics = agent.command_generation_greedy_generation( most_recent_observation_strings, task_desc_strings, previous_dynamics) elif agent.action_space in ["admissible", "exhaustive"]: agent_actions, _, current_dynamics = agent.admissible_commands_greedy_generation( most_recent_observation_strings, task_desc_strings, action_candidate_list, previous_dynamics) else: raise NotImplementedError() random_actions, _ = agent.admissible_commands_random_generation( action_candidate_list) expert_actions, expert_indices = [], [] for b in range(batch_size): if "expert_plan" in infos and len(infos["expert_plan"][b]) > 0: next_action = infos["expert_plan"][b][0] expert_actions.append(next_action) expert_indices.append( action_candidate_list[b].index(next_action) if agent. action_space in ["admissible", "exhaustive"] else -1) else: expert_actions.append("look") expert_indices.append( action_candidate_list[b].index("look") if agent. action_space in ["admissible", "exhaustive"] else -1) from_which = np.random.uniform(low=0.0, high=1.0, size=(batch_size, )) execute_actions = [] for b in range(batch_size): if not report and from_which[b] <= agent.fraction_assist: execute_actions.append(expert_actions[b]) elif not report and from_which[ b] <= agent.fraction_assist + agent.fraction_random: execute_actions.append(random_actions[b]) else: execute_actions.append(agent_actions[b]) replay_info = [ most_recent_observation_strings, task_desc_strings, action_candidate_list, expert_actions, expert_indices ] transition_cache.append(replay_info) env_step_start_time = datetime.datetime.now() obs, _, dones, infos = env.step(execute_actions) env_step_stop_time = datetime.datetime.now() step_time.append( (env_step_stop_time - env_step_start_time).microseconds / (float(batch_size))) scores = [float(item) for item in infos["won"]] dones = [float(item) for item in dones] observation_strings = list(obs) observation_strings = agent.preprocess_observation( observation_strings) if agent.action_space == "exhaustive": action_candidate_list = [ extract_admissible_commands(intro, obs) for intro, obs in zip(first_sight_strings, observation_strings) ] else: action_candidate_list = list(infos["admissible_commands"]) action_candidate_list = agent.preprocess_action_candidates( action_candidate_list) observation_strings = [ item + " [SEP] " + a for item, a in zip(observation_strings, execute_actions) ] # appending the chosen action at previous step into the observation previous_dynamics = current_dynamics if step_in_total % agent.dagger_update_per_k_game_steps == 0: dagger_loss = agent.update_dagger() if dagger_loss is not None: running_avg_dagger_loss.push(dagger_loss) if step_no == agent.max_nb_steps_per_episode - 1: # terminate the game because DQN requires one extra step dones = [1.0 for _ in dones] step_in_total += 1 still_running = [1.0 - float(item) for item in prev_step_dones] # list of float prev_step_dones = dones step_rewards = [ float(curr) - float(prev) for curr, prev in zip(scores, prev_rewards) ] # list of float prev_rewards = scores sequence_game_points.append(step_rewards) still_running_mask.append(still_running) print_actions.append( execute_actions[0] if still_running[0] else "--") # if all ended, break if np.sum(still_running) == 0: break still_running_mask_np = np.array(still_running_mask) game_points_np = np.array( sequence_game_points) * still_running_mask_np # step x batch # push experience into replay buffer (dagger) if not report: for b in range(batch_size): trajectory = [] for i in range(len(transition_cache)): observation_strings, task_strings, action_candidate_list, expert_actions, expert_indices = transition_cache[ i] trajectory.append([ observation_strings[b], task_strings[b], action_candidate_list[b], expert_actions[b], expert_indices[b] ]) if still_running_mask_np[i][b] == 0.0: break agent.dagger_memory.push(trajectory) for b in range(batch_size): if report: running_avg_student_points.push(np.sum(game_points_np, 0)[b]) running_avg_student_steps.push( np.sum(still_running_mask_np, 0)[b]) else: running_avg_game_points.push(np.sum(game_points_np, 0)[b]) running_avg_game_steps.push( np.sum(still_running_mask_np, 0)[b]) # finish game agent.finish_of_episode(episode_no, batch_size) episode_no += batch_size if not report: continue time_2 = datetime.datetime.now() time_spent_seconds = (time_2 - time_1).seconds eps_per_sec = float(episode_no) / time_spent_seconds avg_step_time = np.mean(np.array(step_time)) print( "Model: {:s} | Episode: {:3d} | {:s} | time spent: {:s} | eps/sec : {:2.3f} | avg step time: {:2.10f} | loss: {:2.3f} | game points: {:2.3f} | used steps: {:2.3f} | student points: {:2.3f} | student steps: {:2.3f} | fraction assist: {:2.3f} | fraction random: {:2.3f}" .format(agent.experiment_tag, episode_no, game_names[0], str(time_2 - time_1).rsplit(".")[0], eps_per_sec, avg_step_time, running_avg_dagger_loss.get_avg(), running_avg_game_points.get_avg(), running_avg_game_steps.get_avg(), running_avg_student_points.get_avg(), running_avg_student_steps.get_avg(), agent.fraction_assist, agent.fraction_random)) # print(game_id + ": " + " | ".join(print_actions)) print(" | ".join(print_actions)) # evaluate id_eval_game_points, id_eval_game_step = 0.0, 0.0 ood_eval_game_points, ood_eval_game_step = 0.0, 0.0 if agent.run_eval: if id_eval_env is not None: id_eval_res = evaluate_dagger(id_eval_env, agent, num_id_eval_game) id_eval_game_points, id_eval_game_step = id_eval_res[ 'average_points'], id_eval_res['average_steps'] if ood_eval_env is not None: ood_eval_res = evaluate_dagger(ood_eval_env, agent, num_ood_eval_game) ood_eval_game_points, ood_eval_game_step = ood_eval_res[ 'average_points'], ood_eval_res['average_steps'] if id_eval_game_points >= best_performance_so_far: best_performance_so_far = id_eval_game_points agent.save_model_to_path(output_dir + "/" + agent.experiment_tag + "_id.pt") if ood_eval_game_points >= best_ood_performance_so_far: best_ood_performance_so_far = ood_eval_game_points agent.save_model_to_path(output_dir + "/" + agent.experiment_tag + "_ood.pt") else: if running_avg_student_points.get_avg() >= best_performance_so_far: best_performance_so_far = running_avg_student_points.get_avg() agent.save_model_to_path(output_dir + "/" + agent.experiment_tag + ".pt") # plot using visdom if config["general"]["visdom"]: viz_game_points.append(running_avg_game_points.get_avg()) viz_game_step.append(running_avg_game_steps.get_avg()) viz_student_points.append(running_avg_student_points.get_avg()) viz_student_step.append(running_avg_student_steps.get_avg()) viz_loss.append(running_avg_dagger_loss.get_avg()) viz_id_eval_game_points.append(id_eval_game_points) viz_id_eval_step.append(id_eval_game_step) viz_ood_eval_game_points.append(ood_eval_game_points) viz_ood_eval_step.append(ood_eval_game_step) viz_x = np.arange(len(viz_game_points)).tolist() if reward_win is None: reward_win = viz.line(X=viz_x, Y=viz_game_points, opts=dict(title=agent.experiment_tag + "_game_points"), name="game points") viz.line(X=viz_x, Y=viz_student_points, opts=dict(title=agent.experiment_tag + "_student_points"), win=reward_win, update='append', name="student points") viz.line(X=viz_x, Y=viz_id_eval_game_points, opts=dict(title=agent.experiment_tag + "_id_eval_game_points"), win=reward_win, update='append', name="id eval game points") viz.line(X=viz_x, Y=viz_ood_eval_game_points, opts=dict(title=agent.experiment_tag + "_ood_eval_game_points"), win=reward_win, update='append', name="ood eval game points") else: viz.line(X=[len(viz_game_points) - 1], Y=[viz_game_points[-1]], opts=dict(title=agent.experiment_tag + "_game_points"), win=reward_win, update='append', name="game points") viz.line(X=[len(viz_student_points) - 1], Y=[viz_student_points[-1]], opts=dict(title=agent.experiment_tag + "_student_points"), win=reward_win, update='append', name="student points") viz.line(X=[len(viz_id_eval_game_points) - 1], Y=[viz_id_eval_game_points[-1]], opts=dict(title=agent.experiment_tag + "_id_eval_game_points"), win=reward_win, update='append', name="id eval game points") viz.line(X=[len(viz_ood_eval_game_points) - 1], Y=[viz_ood_eval_game_points[-1]], opts=dict(title=agent.experiment_tag + "_ood_eval_game_points"), win=reward_win, update='append', name="ood eval game points") if step_win is None: step_win = viz.line(X=viz_x, Y=viz_game_step, opts=dict(title=agent.experiment_tag + "_game_step"), name="game step") viz.line(X=viz_x, Y=viz_student_step, opts=dict(title=agent.experiment_tag + "_student_step"), win=step_win, update='append', name="student step") viz.line(X=viz_x, Y=viz_id_eval_step, opts=dict(title=agent.experiment_tag + "_id_eval_step"), win=step_win, update='append', name="id eval step") viz.line(X=viz_x, Y=viz_ood_eval_step, opts=dict(title=agent.experiment_tag + "_ood_eval_step"), win=step_win, update='append', name="ood eval step") else: viz.line(X=[len(viz_game_step) - 1], Y=[viz_game_step[-1]], opts=dict(title=agent.experiment_tag + "_game_step"), win=step_win, update='append', name="game step") viz.line(X=[len(viz_student_step) - 1], Y=[viz_student_step[-1]], opts=dict(title=agent.experiment_tag + "_student_step"), win=step_win, update='append', name="student step") viz.line(X=[len(viz_id_eval_step) - 1], Y=[viz_id_eval_step[-1]], opts=dict(title=agent.experiment_tag + "_id_eval_step"), win=step_win, update='append', name="id eval step") viz.line(X=[len(viz_ood_eval_step) - 1], Y=[viz_ood_eval_step[-1]], opts=dict(title=agent.experiment_tag + "_ood_eval_step"), win=step_win, update='append', name="ood eval step") if loss_win is None: loss_win = viz.line(X=viz_x, Y=viz_loss, opts=dict(title=agent.experiment_tag + "_loss"), name="loss") else: viz.line(X=[len(viz_loss) - 1], Y=[viz_loss[-1]], opts=dict(title=agent.experiment_tag + "_loss"), win=loss_win, update='append', name="loss") # write accuracies down into file _s = json.dumps({ "time spent": str(time_2 - time_1).rsplit(".")[0], "time spent seconds": time_spent_seconds, "episodes": episode_no, "episodes per second": eps_per_sec, "loss": str(running_avg_dagger_loss.get_avg()), "train game points": str(running_avg_game_points.get_avg()), "train game steps": str(running_avg_game_steps.get_avg()), "train student points": str(running_avg_student_points.get_avg()), "train student steps": str(running_avg_student_steps.get_avg()), "id eval game points": str(id_eval_game_points), "id eval steps": str(id_eval_game_step), "ood eval game points": str(ood_eval_game_points), "ood eval steps": str(ood_eval_game_step) }) with open(output_dir + "/" + json_file_name + '.json', 'a+') as outfile: outfile.write(_s + '\n') outfile.flush() agent.save_model_to_path(output_dir + "/" + agent.experiment_tag + "_final.pt")
imdb = get_imdb('voc_2007_trainval') print imdb._classes print imdb._class_to_ind pathto2018=imdb.image_path_at(2017) print pathto2018 filename = pathto2018.split('/')[-1] index = filename.split('.')[0] annotation = imdb._load_pascal_annotation(index) print annotation gt_boxes = annotation['boxes'] gt_roidb = imdb.gt_roidb() roidb = imdb._load_selective_search_roidb(gt_roidb) roi2018 = roidb[2018] ##print roidb vis = visdom.Visdom(port='8097') myimg = cv2.imread(pathto2018) for box in gt_boxes: bbox = tuple(box) cv2.rectangle(myimg, bbox[0:2], bbox[2:4], (0,204,0), 2) for box in roi2018['boxes'][:10]: bbox = tuple(box) cv2.rectangle(myimg, bbox[0:2], bbox[2:4], (0,0,204), 2) #print myimg #cv2.imshow('origin',myimg) myimg = myimg[:,:,-1::-1] new_img = np.transpose(myimg, (2, 0, 1)) print new_img.shape vis.image(new_img)
from utils import * import os import sys import numpy as np import torch import torch.optim as optim import visdom if __name__ == '__main__': opt = TrainOptions().parse() vis = visdom.Visdom(port=8888) dataset = ShapeNet(SVR=True, normal=False, class_choice=opt.class_choice, train=opt.use_train) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=int(opt.num_workers)) len_dataset = len(dataset) print('training set: ', len_dataset) model = AtlasNetSingle(latent_size=1024, use_pretrained_encoder=opt.pretrained_enc, use_skipconnected_decoder=opt.skipconnected_dec)
# main imports import math import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision # local version imports import visdom vis = visdom.Visdom(port=12345) vis.line(X=np.array([0]), Y=np.array([[np.nan, np.nan]]), win='loss') vis.line(X=np.array([0]), Y=np.array([[np.nan, np.nan]]), win='acc') device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # helper function to make getting another batch of data easier def cycle(iterable): while True: for x in iterable: yield x class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] # class_names = ['Zero', 'One', 'Two', 'Three', 'Four', 'Five', 'Six', 'Seven', 'Eight', 'Nine'] train_loader = torch.utils.data.DataLoader( torchvision.datasets.FashionMNIST('data', train=True, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.Resize(32), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])),
def reinit(self, env='default', **kwargs): self.vis = visdom.Visdom(env=env, **kwargs) return self
def Training(): print('Training') # Hyper parameter print('Initializing hyper parameter') vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='epoch', ylabel='Loss', title='Training Loss', legend=['Loss'])) ## Get dataset print("Get dataset") loader = Generator() ## Get agent and model print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(1912, "tensor(0.9420)") ## Loop for training print('Training loop') step = 0 sampling_list = None for epoch in range(p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate( sampling_list): #training #util.visualize_points(inputs[0], target_lanes[0], target_h[0]) print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data if step % 50 == 0: vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') if step % 100 == 0: lane_agent.save_model(int(step / 100), loss_p) testing(lane_agent, test_image, step, loss_p) step += 1 sampling_list = copy.deepcopy(lane_agent.get_data_list()) lane_agent.sample_reset() #evaluation if epoch >= 0 and epoch % 1 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.8] index = [3] lane_agent.save_model(int(step / 100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_agent, index=idx, name="test_result_" + str(epoch) + "_" + str(idx) + ".json") if int(step) > 700000: break
def train(data, name, save_dset, save_model_dir, seg=True, ignore=False, cove_flag=False): print('---Training model---') data.show_data_summary() save_data_name = save_dset save_data_setting(data, save_data_name) model = NER(data, cove_flag) if data.gpu: model = model.cuda() if data.optim.lower() == 'adam': optimizer = optim.Adam(model.parameters()) elif data.optim.lower() == 'rmsprop': optimizer = optim.RMSprop(model.parameters()) elif data.optim.lower() == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif data.optim.lower() == 'adagrad': optimizer = optim.Adagrad(model.parameters()) elif data.optim.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=data.lr, momentum=data.momentum) else: optimizer = None print('Error optimizer selection, please check config.optim.') exit(1) best_test = -1 epoch = data.iteration vis = visdom.Visdom() losses = [] all_F = [[0., 0.]] dict_F = {} label_F = [] for idx in range(epoch): epoch_start = time.time() tmp_start = epoch_start print('Epoch: %s/%s' % (idx, epoch)) if data.optim.lower() == 'sgd': optimizer = lr_decay(optimizer, idx, data.lr_decay, data.lr) instance_count = 0 sample_loss = 0 total_loss = 0 right_token = 0 whole_token = 0 random.shuffle(data.train_ids) model.train() model.zero_grad() batch_size = data.batch_size train_num = len(data.train_ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_ids[start:end] if not instance: continue batch_word, batch_wordlen, batch_wordrecover, batch_label, mask = batchify_with_label( instance, data.gpu) instance_count += 1 loss, tag_seq = model.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_label, mask) right, whole = predict_check(tag_seq, batch_label, mask) right_token += right whole_token += whole sample_loss += loss.data[0] total_loss += loss.data[0] if end % 500 == 0: tmp_time = time.time() tmp_cost = tmp_time - tmp_start tmp_start = tmp_time print( '\tInstance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f' % (end, tmp_cost, sample_loss, right_token, whole_token, (right_token + 0.0) / whole_token)) sys.stdout.flush() losses.append(sample_loss / 500.0) Lwin = 'Loss of ' + name vis.line(np.array(losses), X=np.array([i for i in range(len(losses))]), win=Lwin, opts={ 'title': Lwin, 'legend': ['loss'] }) sample_loss = 0 loss.backward() if data.clip: torch.nn.utils.clip_grad_norm(model.parameters(), 10.0) optimizer.step() model.zero_grad() tmp_time = time.time() tmp_cost = tmp_time - tmp_start print('\tInstance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f' % (end, tmp_cost, sample_loss, right_token, whole_token, (right_token + 0.0) / whole_token)) epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print( 'Epoch: %s training finished. Time: %.2fs, speed: %.2ft/s, total_loss: %s' % (idx, epoch_cost, train_num / epoch_cost, total_loss)) speed, acc, p, r, f_train, _, _, dict_train = evaluate(data, model, 'train', ignore=ignore) # speed, acc, p, r, f_dev, dict_dev = evaluate(data, model, 'dev', ignore=ignore) speed, acc, p, r, f_test, _, _, dict_test = evaluate(data, model, 'test', ignore=ignore) test_finish = time.time() test_cost = test_finish - epoch_finish if seg: current_score = f_test print( 'Test: time: %.2fs, speed: %.2ft/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f' % (test_cost, speed, acc, p, r, f_test)) else: current_score = acc print('Test: time: %.2fs, speed: %.2ft/s; acc: %.4f' % (test_cost, speed, acc)) if current_score > best_test: if seg: print('Exceed previous best f score: ', best_test) else: print('Exceed previous best acc score: ', best_test) model_name = save_model_dir + '/' + name torch.save(model.state_dict(), model_name) best_test = current_score with open( save_model_dir + '/' + name + '_eval_' + str(idx) + '.txt', 'w') as f: if seg: f.write('acc: %.4f, p: %.4f, r: %.4f, f: %.4f' % (acc, p, r, best_test)) f.write('acc: %.4f, p: %.4f' % (acc, p)) else: f.write('acc: %.4f' % acc) if seg: print('Current best f score: ', best_test) else: print('Current best acc score: ', best_test) all_F.append([f_train * 100.0, f_test * 100.0]) Fwin = 'F1-score of ' + name + ' {train, test}' vis.line(np.array(all_F), X=np.array([i for i in range(len(all_F))]), win=Fwin, opts={ 'title': Fwin, 'legend': ['train', 'test'] }) if dict_train: for key, value in dict_train.items(): if key not in label_F: dict_F[key] = [[0., 0.]] label_F.append(key) dict_F[key].append( [dict_train[key] * 100.0, dict_test[key] * 100.0]) Fwin = 'F1-score of ' + name + '_' + key + ' {train, test}' vis.line(np.array(dict_F[key]), X=np.array([i for i in range(len(dict_F[key]))]), win=Fwin, opts={ 'title': Fwin, 'legend': ['train', 'test'] }) gc.collect()
def main(): #env = retro.make(game='Airstriker-Genesis', state='Level1') #env = retro.make(game='SonicTheHedgehog-Genesis')#, state='GreenHillZone.Act1') #files = os.listdir("data") training_steps = 200000 batch_size = 16 #16 test_batch_size = 64 learn_rate = 0.00005 #0.00003 save_file_name = "test6" training_data_folder = "data/GreenHillZone/" files = os.listdir(training_data_folder) obss = [] rewards = [] actions = [] for f in files: infile = open(training_data_folder+f,'rb') data=pickle.load(infile) obss.append(data[0]) rewards.append(data[1]) actions.append(data[2]) all_obs = np.concatenate(obss) all_rewards = np.concatenate(rewards) all_actions = np.concatenate(actions) all_data_length = len(all_rewards) shuffle = np.random.permutation(all_data_length) all_obss = all_obs[shuffle] all_rewards = all_rewards[shuffle] all_actions = all_actions[shuffle] all_obss = torch.Tensor(all_obss)#.cuda() all_rewards = torch.Tensor(all_rewards)#.cuda() all_actions = torch.Tensor(all_actions)#.cuda() all_data_length = len(all_rewards) test_obss = all_obss[int(0.9*all_data_length):].cuda() test_rewards = all_rewards[int(0.9*all_data_length):].cuda() test_actions = all_rewards[int(0.9*all_data_length):].cuda() test_data_length = len(test_rewards) obss = all_obss[:int(0.9*all_data_length)].cuda() rewards = all_rewards[:int(0.9*all_data_length)].cuda() actions = all_rewards[:int(0.9*all_data_length)].cuda() data_length = len(test_rewards) print(all_obss.shape) v_size = 256 img_encoder = MBP_ED.ImageEncoder(num_resblocks=6, res_depth=64, bottleneck_depth=32, output_dim=v_size*2, strides=(2,1,2,1,2,1), input_shape=(64,64),res_input_shape = (64,64),linear_in=4096) img_decoder = MBP_ED.ImageDecoder(num_resblocks=6, res_depth=64, bottleneck_depth=32, input_dim=v_size*2, strides=(1,2,1,2,1,2), output_shape=(64,64),res_output_shape = (64,64),linear_out=4096) img_encoder = img_encoder.cuda() img_decoder = img_decoder.cuda() prior = MBP_MLP.DistributionalMLP(512,512,[512]).cuda() v_out = MBP_MLP.DistributionalMLP(256,512,[512]).cuda() params = list(img_encoder.parameters())+ \ list(img_decoder.parameters())+ \ list(prior.parameters())+ \ list(v_out.parameters()) optimizer = optim.Adam(params, lr=learn_rate, weight_decay=3*10**-6) vis = visdom.Visdom() graph_step = 250 batch_losses = torch.ones(training_steps) test_losses = [] batch_loss_chart = vis.line(torch.Tensor([0])) test_chart = vis.line(torch.Tensor([0])) for i in range(training_steps): optimizer.zero_grad() ind = torch.LongTensor(np.random.choice((data_length-1),batch_size,replace=False)).cuda() img_batch = obss[ind] rew_batch = rewards[ind] act_batch = actions[ind] '''batch_start = 10#random.randint(0, data_length - batch_size - 2) img_batch = obss[batch_start:batch_start+batch_size] rew_batch = rewards[batch_start:batch_start+batch_size] act_batch = actions[batch_start:batch_start+batch_size]''' obs_ten = img_batch obs_ten = obs_ten.permute(0,3,1,2) img_in = Variable(obs_ten,requires_grad=True) img_e = img_encoder(img_in) e_dist = prior(img_e) mu = e_dist[:,v_size:] #most VAEs I have seen use exp here to ensure sigma is > 0 but doing so seems to cause the model to diverge and abs seems to works well ¯\_(ツ)_/¯ sigma = torch.abs(e_dist[:,:v_size])+0.0001#torch.exp(e_dist[:,:v_size])#torch.abs(e_dist[:,:v_size])+0.0001 v = mu + sigma*Variable(torch.randn(mu.shape)).cuda() vo = v_out(v) img_de = img_decoder(vo) kl_loss = -0.5 * torch.sum(1 + torch.log(sigma.pow(2)) - mu.pow(2) - sigma.pow(2)) dist_loss = torch.sum((img_in.detach()/255-img_de/255)**2) loss = dist_loss +kl_loss #print ("loss",loss) loss.backward()#retain_graph=True) optimizer.step() batch_losses[i] = loss.data[0] if(i%graph_step == 0): print("mu",mu[-1]) print("sigma",sigma[-1]) print("v",v[-1]) print("in",img_in[-1]) print("out",img_de[-1]) print(i,loss.data) ind = torch.LongTensor(np.random.choice((test_data_length-1),test_batch_size,replace=False)).cuda() img_batch = test_obss[ind] rew_batch = test_rewards[ind] act_batch = test_actions[ind] img_batch = img_batch.permute(0,3,1,2) img_in_test = Variable(img_batch,requires_grad=False) img_e = img_encoder(img_in_test) e_dist = prior(img_e) mu = e_dist[:,v_size:] sigma = torch.abs(e_dist[:,:v_size])+0.0001 v = mu + sigma*Variable(torch.randn(mu.shape)).cuda() vo = v_out(v) img_de_test = img_decoder(vo) kl_loss = -0.5 * torch.sum(1 + torch.log(sigma.pow(2)) - mu.pow(2) - sigma.pow(2)) dist_loss = torch.sum((img_in_test.detach()/255-img_de_test/255)**2) loss = dist_loss +kl_loss test_losses.append(loss.data[0] * (batch_size/test_batch_size)) if(i>0): vis.updateTrace(X=torch.arange(i-graph_step,i),Y=batch_losses[i-graph_step:i], append = True, win=batch_loss_chart, name="Batch Losses") vis.updateTrace(X=torch.arange(i,i+graph_step,step=graph_step),Y=torch.Tensor([test_losses[-1]]), append = True, win=test_chart, name="test Losses") if(i%2500==0 and i>0): vis.images([img_de.data[0],img_in.data[0]],opts=dict(caption='train ep.'+str(i))) vis.images([img_de_test.data[0],img_in_test.data[0]],opts=dict(caption='test ep. '+str(i))) torch.save(img_encoder,save_file_name+"_img_encoder.pth") torch.save(img_decoder,save_file_name+"_img_decoder.pth") torch.save(prior,save_file_name+"_prior_mlp.pth") torch.save(v_out,save_file_name+"_v_out_mlp.pth")
def test_optimizer(data): print('---Test Optimizers---') model_SGD = NER(data) model_Adam = NER(data) model_RMSprop = NER(data) model_Adadelta = NER(data) model_Adagrad = NER(data) if data.gpu: model_SGD = model_SGD.cuda() model_Adam = model_Adam.cuda() model_RMSprop = model_RMSprop.cuda() model_Adadelta = model_Adadelta.cuda() model_Adagrad = model_Adagrad.cuda() optimizer_SGD = optim.SGD(model_SGD.parameters(), lr=data.lr, momentum=data.momentum) optimizer_Adam = optim.Adam(model_Adam.parameters()) optimizer_RMSprop = optim.RMSprop(model_RMSprop.parameters()) optimizer_Adadelta = optim.Adadelta(model_Adadelta.parameters()) optimizer_Adagrad = optim.Adagrad(model_Adagrad.parameters()) epoch = data.iteration vis = visdom.Visdom() losses = [] train_F = [[0., 0., 0., 0., 0.]] # dev_F = [[0., 0., 0., 0., 0.]] test_F = [[0., 0., 0., 0., 0.]] for idx in range(epoch): epoch_start = time.time() print('Epoch: %s/%s' % (idx, epoch)) optimizer_SGD = lr_decay(optimizer_SGD, idx, data.lr_decay, data.lr) instance_count = 0 sample_loss_SGD = 0 sample_loss_Adam = 0 sample_loss_RMSprop = 0 sample_loss_Adadelta = 0 sample_loss_Adagrad = 0 random.shuffle(data.train_ids) model_SGD.train() model_Adam.train() model_RMSprop.train() model_Adadelta.train() model_Adagrad.train() model_SGD.zero_grad() model_Adam.zero_grad() model_RMSprop.zero_grad() model_Adadelta.zero_grad() model_Adagrad.zero_grad() batch_size = data.batch_size train_num = len(data.train_ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_ids[start:end] if not instance: continue batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label( instance, data.gpu) instance_count += 1 loss_SGD, tag_seq_SGD = model_SGD.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) loss_Adam, tag_seq_Adam = model_Adam.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) loss_RMSprop, tag_seq_RMSprop = model_RMSprop.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) loss_Adadelta, tag_seq_Adadelta = model_Adadelta.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) loss_Adagrad, tag_seq_Adagrad = model_Adagrad.neg_log_likelihood_loss( batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask) sample_loss_SGD += loss_SGD.data[0] sample_loss_Adam += loss_Adam.data[0] sample_loss_RMSprop += loss_RMSprop.data[0] sample_loss_Adadelta += loss_Adadelta.data[0] sample_loss_Adagrad += loss_Adagrad.data[0] if end % 500 == 0: sys.stdout.flush() losses.append([ sample_loss_SGD / 50.0, sample_loss_Adam / 50.0, sample_loss_RMSprop / 50.0, sample_loss_Adadelta / 50.0, sample_loss_Adagrad / 50.0 ]) Lwin = 'Loss of Optimizers' vis.line(np.array(losses), X=np.array([i for i in range(len(losses))]), win=Lwin, opts={ 'title': Lwin, 'legend': ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad'] }) sample_loss_SGD = 0 sample_loss_Adam = 0 sample_loss_RMSprop = 0 sample_loss_Adadelta = 0 sample_loss_Adagrad = 0 loss_SGD.backward() loss_Adam.backward() loss_RMSprop.backward() loss_Adadelta.backward() loss_Adagrad.backward() # if data.clip: # torch.nn.utils.clip_grad_norm(model.parameters(), 10.0) optimizer_SGD.step() optimizer_Adam.step() optimizer_RMSprop.step() optimizer_Adadelta.step() optimizer_Adagrad.step() model_SGD.zero_grad() model_Adam.zero_grad() model_RMSprop.zero_grad() model_Adadelta.zero_grad() model_Adagrad.zero_grad() epoch_finish = time.time() epoch_cost = epoch_finish - epoch_start print('Epoch: %s training finished. Time: %.2fs, speed: %.2ft/s' % (idx, epoch_cost, train_num / epoch_cost)) speed, acc, p, r, f_train_SGD, _, _, _ = evaluate( data, model_SGD, 'train') speed, acc, p, r, f_train_Adam, _, _, _ = evaluate( data, model_Adam, 'train') speed, acc, p, r, f_train_RMSprop, _, _, _ = evaluate( data, model_RMSprop, 'train') speed, acc, p, r, f_train_Adadelta, _, _, _ = evaluate( data, model_Adadelta, 'train') speed, acc, p, r, f_train_Adagrad, _, _, _ = evaluate( data, model_Adagrad, 'train') train_F.append([ f_train_SGD * 100, f_train_Adam * 100, f_train_RMSprop * 100, f_train_Adadelta * 100, f_train_Adagrad * 100 ]) train_Fwin = 'F1-score of Optimizers{train}' vis.line(np.array(train_F), X=np.array([i for i in range(len(train_F))]), win=train_Fwin, opts={ 'title': train_Fwin, 'legend': ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad'] }) # speed, acc, p, r, f_dev_SGD, _ = evaluate(data, model_SGD, 'dev') # speed, acc, p, r, f_dev_Adam, _ = evaluate(data, model_Adam, 'dev') # speed, acc, p, r, f_dev_RMSprop, _ = evaluate(data, model_RMSprop, 'dev') # speed, acc, p, r, f_dev_Adadelta, _ = evaluate(data, model_Adadelta, 'dev') # speed, acc, p, r, f_dev_Adagrad, _ = evaluate(data, model_Adagrad, 'dev') # # dev_F.append([f_dev_SGD * 100, f_dev_Adam * 100, f_dev_RMSprop * 100, f_dev_Adadelta * 100, # f_dev_Adagrad * 100]) # dev_Fwin = 'F1-score of Optimizers{dev}' # vis.line(np.array(dev_F), X=np.array([i for i in range(len(dev_F))]), # win=dev_Fwin, opts={'title': dev_Fwin, 'legend': ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']}) speed, acc, p, r, f_test_SGD, _, _, _ = evaluate( data, model_SGD, 'test') speed, acc, p, r, f_test_Adam, _, _, _ = evaluate( data, model_Adam, 'test') speed, acc, p, r, f_test_RMSprop, _, _, _ = evaluate( data, model_RMSprop, 'test') speed, acc, p, r, f_test_Adadelta, _, _, _ = evaluate( data, model_Adadelta, 'test') speed, acc, p, r, f_test_Adagrad, _, _, _ = evaluate( data, model_Adagrad, 'test') test_F.append([ f_test_SGD * 100, f_test_Adam * 100, f_test_RMSprop * 100, f_test_Adadelta * 100, f_test_Adagrad * 100 ]) test_Fwin = 'F1-score of Optimizers{test}' vis.line(np.array(test_F), X=np.array([i for i in range(len(test_F))]), win=test_Fwin, opts={ 'title': test_Fwin, 'legend': ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad'] }) gc.collect()
def __init__(self, args): self.use_cuda = args.cuda and torch.cuda.is_available() self.max_iter = args.max_iter self.global_iter = 0 self.z_dim = args.z_dim self.beta = args.beta self.gamma = args.gamma self.C_max = args.C_max self.C_stop_iter = args.C_stop_iter self.objective = args.objective self.model = args.model self.lr = args.lr self.beta1 = args.beta1 self.beta2 = args.beta2 if args.dataset.lower() == 'dsprites': self.nc = 1 self.decoder_dist = 'bernoulli' elif args.dataset.lower() == '3dchairs': self.nc = 3 self.decoder_dist = 'gaussian' elif args.dataset.lower() == 'celeba': self.nc = 3 self.decoder_dist = 'gaussian' else: raise NotImplementedError if args.model == 'H': net = BetaVAE_H elif args.model == 'B': net = BetaVAE_B else: raise NotImplementedError('only support model H or B') self.net = cuda(net(self.z_dim, self.nc), self.use_cuda) self.optim = optim.Adam(self.net.parameters(), lr=self.lr, betas=(self.beta1, self.beta2)) self.viz_name = args.viz_name self.viz_port = args.viz_port self.viz_on = args.viz_on self.win_recon = None self.win_kld = None self.win_mu = None self.win_var = None if self.viz_on: self.viz = visdom.Visdom(port=self.viz_port) self.ckpt_dir = os.path.join(args.ckpt_dir, args.viz_name) if not os.path.exists(self.ckpt_dir): os.makedirs(self.ckpt_dir, exist_ok=True) self.ckpt_name = args.ckpt_name if self.ckpt_name is not None: self.load_checkpoint(self.ckpt_name) self.save_output = args.save_output self.output_dir = os.path.join(args.output_dir, args.viz_name) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.gather_step = args.gather_step self.display_step = args.display_step self.save_step = args.save_step self.dset_dir = args.dset_dir self.dataset = args.dataset self.batch_size = args.batch_size self.data_loader = return_data(args) self.gather = DataGather()
def reinit(self, env='default', **kwargs): """ change the config of visdom """ self.vis = visdom.Visdom(env=env, **kwargs) return self
def train(args): init_time = str(int(time.time())) if args.vis: vis = visdom.Visdom() # if args.dataset_path == '': # HOME_PATH = os.path.expanduser('~') # local_path = os.path.join(HOME_PATH, 'Data/CamVid') # else: local_path = os.path.expanduser(args.dataset_path) if args.dataset == 'CamVid': dst = camvidLoader(local_path, is_transform=True, is_augment=args.data_augment) elif args.dataset == 'CityScapes': dst = cityscapesLoader(local_path, is_transform=True) else: pass # dst.n_classes = args.n_classes # 保证输入的class trainloader = torch.utils.data.DataLoader(dst, batch_size=args.batch_size, shuffle=True) start_epoch = 0 if args.resume_model != '': model = torch.load(args.resume_model) start_epoch_id1 = args.resume_model.rfind('_') start_epoch_id2 = args.resume_model.rfind('.') start_epoch = int(args.resume_model[start_epoch_id1 + 1:start_epoch_id2]) else: if args.structure == 'fcn32s': model = fcn(module_type='32s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn16s': model = fcn(module_type='16s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn8s': model = fcn(module_type='8s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet18_32s': model = fcn_resnet18(module_type='32s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet18_16s': model = fcn_resnet18(module_type='16s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet18_8s': model = fcn_resnet18(module_type='8s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet34_32s': model = fcn_resnet34(module_type='32s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet34_16s': model = fcn_resnet34(module_type='16s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'fcn_resnet34_8s': model = fcn_resnet34(module_type='8s', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'ResNetDUC': model = ResNetDUC(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'ResNetDUCHDC': model = ResNetDUCHDC(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'segnet': model = segnet(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'segnet_vgg19': model = segnet_vgg19(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'segnet_unet': model = segnet_unet(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'segnet_alignres': model = segnet_alignres(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'sqnet': model = sqnet(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'segnet_squeeze': model = segnet_squeeze(n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'ENet': model = ENet(n_classes=dst.n_classes) elif args.structure == 'ENetV2': model = ENetV2(n_classes=dst.n_classes) elif args.structure == 'drn_d_22': model = DRNSeg(model_name='drn_d_22', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'drn_a_50': model = DRNSeg(model_name='drn_a_50', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'drn_a_18': model = DRNSeg(model_name='drn_a_18', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'drn_e_22': model = DRNSeg(model_name='drn_e_22', n_classes=dst.n_classes, pretrained=args.init_vgg16) elif args.structure == 'pspnet': model = pspnet(n_classes=dst.n_classes) elif args.structure == 'erfnet': model = erfnet(n_classes=dst.n_classes) elif args.structure == 'fcdensenet103': model = fcdensenet103(n_classes=dst.n_classes) elif args.structure == 'fcdensenet56': model = fcdensenet56(n_classes=dst.n_classes) if args.resume_model_state_dict != '': try: # fcn32s、fcn16s和fcn8s模型略有增加参数,互相赋值重新训练过程中会有KeyError,暂时捕捉异常处理 start_epoch_id1 = args.resume_model_state_dict.rfind('_') start_epoch_id2 = args.resume_model_state_dict.rfind('.') start_epoch = int( args.resume_model_state_dict[start_epoch_id1 + 1:start_epoch_id2]) pretrained_dict = torch.load(args.resume_model_state_dict) # model_dict = model.state_dict() # for k, v in pretrained_dict.items(): # print(k) # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # model_dict.update(pretrained_dict) model.load_state_dict(pretrained_dict) except KeyError: print('missing key') if args.cuda: model.cuda() print('start_epoch:', start_epoch) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, momentum=0.99, weight_decay=5e-4) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4) for epoch in range(start_epoch + 1, 20000, 1): loss_epoch = 0 loss_avg_epoch = 0 data_count = 0 # if args.vis: # vis.text('epoch:{}'.format(epoch), win='epoch') for i, (imgs, labels) in enumerate(trainloader): print(i) data_count = i # print(labels.shape) # print(imgs.shape) imgs = Variable(imgs) labels = Variable(labels) if args.cuda: imgs = imgs.cuda() labels = labels.cuda() outputs = model(imgs) if args.vis and i % 50 == 0: pred_labels = outputs.cpu().data.max(1)[1].numpy() # print(pred_labels.shape) label_color = dst.decode_segmap( labels.cpu().data.numpy()[0]).transpose(2, 0, 1) # print(label_color.shape) pred_label_color = dst.decode_segmap(pred_labels[0]).transpose( 2, 0, 1) # print(pred_label_color.shape) win = 'label_color' vis.image(label_color, win=win) win = 'pred_label_color' vis.image(pred_label_color, win=win) # if epoch < 100: # if not os.path.exists('/tmp/'+init_time): # os.mkdir('/tmp/'+init_time) # time_str = str(int(time.time())) # print('label_color.transpose(2, 0, 1).shape:', label_color.transpose(1, 2, 0).shape) # print('pred_label_color.transpose(2, 0, 1).shape:', pred_label_color.transpose(1, 2, 0).shape) # cv2.imwrite('/tmp/'+init_time+'/'+time_str+'_label.png', label_color.transpose(1, 2, 0)) # cv2.imwrite('/tmp/'+init_time+'/'+time_str+'_pred_label.png', pred_label_color.transpose(1, 2, 0)) # print(outputs.size()) # print(labels.size()) # 一次backward后如果不清零,梯度是累加的 optimizer.zero_grad() loss = cross_entropy2d(outputs, labels) loss_np = loss.cpu().data.numpy() loss_epoch += loss_np print('loss:', loss_np) loss.backward() optimizer.step() # 显示一个周期的loss曲线 if args.vis: win = 'loss' loss_np_expand = np.expand_dims(loss_np, axis=0) win_res = vis.line(X=np.ones(1) * i, Y=loss_np_expand, win=win, update='append') if win_res != win: vis.line(X=np.ones(1) * i, Y=loss_np_expand, win=win) # 关闭清空一个周期的loss if args.vis: win = 'loss' vis.close(win) # 显示多个周期的loss曲线 loss_avg_epoch = loss_epoch / (data_count * 1.0) # print(loss_avg_epoch) if args.vis: win = 'loss_epoch' loss_avg_epoch_expand = np.expand_dims(loss_avg_epoch, axis=0) win_res = vis.line(X=np.ones(1) * epoch, Y=loss_avg_epoch_expand, win=win, update='append') if win_res != win: vis.line(X=np.ones(1) * epoch, Y=loss_avg_epoch_expand, win=win) if args.save_model and epoch % args.save_epoch == 0: torch.save( model.state_dict(), '{}_camvid_class_{}_{}.pt'.format(args.structure, dst.n_classes, epoch))
def main(args): # clear param store pyro.clear_param_store() # setup MNIST data loaders # train_loader, test_loader train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256) # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(vae.model, vae.guide, optimizer, loss=elbo) # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for x, _ in train_loader: # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for i, (x, _) in enumerate(test_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # compute ELBO estimate and accumulate loss test_loss += svi.evaluate_loss(x) # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them if i == 0: if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.shape[0], 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_img(test_img) vis.image(test_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) if epoch == args.tsne_iter: mnist_test_tsne(vae=vae, test_loader=test_loader) plot_llk(np.array(train_elbo), np.array(test_elbo)) return vae
def demo01(): import visdom import numpy as np vis = visdom.Visdom() vis.text('Hello, world!') vis.image(np.ones((3, 10, 10)))
def train(): use_gpu = torch.cuda.is_available() file_root = os.path.dirname(os.path.abspath(__file__)) learning_rate = 0.001 num_epochs = 300 batch_size = 32 net = FaceBox() if use_gpu: net.cuda() print('load model...') net.load_state_dict(torch.load('weight/faceboxes.pt')) criterion = MultiBoxLoss() #optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0005) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-4) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[198, 248], gamma=0.1) train_dataset = ListDataset(root=file_root, list_file='data/train_rewrite.txt', train=True, transform=[transforms.ToTensor()]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) val_dataset = ListDataset(root=file_root, list_file='data/val_rewrite.txt', train=False, transform=[transforms.ToTensor()]) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=1) print('the dataset has %d images' % (len(train_dataset))) print('the batch_size is %d' % (batch_size)) num_iter = 0 vis = visdom.Visdom() win = vis.line(Y=np.array([0]), X=np.array([0])) net.train() for epoch in range(num_epochs): scheduler.step() print('\n\nStarting epoch %d / %d' % (epoch + 1, num_epochs)) print('Learning Rate for this epoch: {}'.format(learning_rate)) total_loss = 0. net.train() for i, (images, loc_targets, conf_targets) in enumerate(train_loader): if use_gpu: images = images.cuda() loc_targets = loc_targets.cuda() conf_targets = conf_targets.cuda() loc_preds, conf_preds = net(images) loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 10 == 0: print( 'Epoch [{}/{}], Iter [{}/{}] Loss: {:.4f}, average_loss: {:.4f}' .format(epoch + 1, num_epochs, i + 1, len(train_loader), loss.item(), total_loss / (i + 1))) vis.line(Y=np.array([total_loss / (i + 1)]), X=np.array([num_iter]), win=win, name='train', update='append') num_iter += 1 val_loss = 0.0 net.eval() for idx, (images, loc_targets, conf_targets) in enumerate(val_loader): with torch.no_grad(): if use_gpu: images = images.cuda() loc_targets = loc_targets.cuda() conf_targets = conf_targets.cuda() loc_preds, conf_preds = net(images) loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets) val_loss += loss.item() val_loss /= len(val_dataset) / batch_size vis.line(Y=np.array([val_loss]), X=np.array([epoch * 40 + 40]), win=win, name='val', update='append') print('loss of val is {}'.format(val_loss)) if not os.path.exists('weight/'): os.mkdir('weight') print('saving model ...') torch.save(net.state_dict(), 'weight/faceboxes.pt')
def main(args): modelpath = args.loadDir + args.loadModel weightspath = args.loadDir + args.loadWeights print("Loading model: " + modelpath) print("Loading weights: " + weightspath) #Import ERFNet model from the folder #Net = importlib.import_module(modelpath.replace("/", "."), "ERFNet") model = ERFNet(NUM_CLASSES) model = torch.nn.DataParallel(model) if (not args.cpu): model = model.cuda() #model.load_state_dict(torch.load(args.state)) #model.load_state_dict(torch.load(weightspath)) #not working if missing key def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model model = load_my_state_dict(model, torch.load(weightspath)) print("Model and weights LOADED successfully") model.eval() if (not os.path.exists(args.datadir)): print("Error: datadir could not be loaded") loader = DataLoader(cityscapes(args.datadir, input_transform_cityscapes, subset=args.subset), num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False) # For visualizer: # must launch in other window "python3.6 -m visdom.server -port 8097" # and access localhost:8097 to see it if (args.visualize): vis = visdom.Visdom() for step, (images, filename) in enumerate(loader): if (not args.cpu): images = images.cuda() inputs = Variable(images, volatile=True) outputs = model(inputs) label = outputs[0].cpu().max(0)[1].data.byte() label_color = Colorize()(label.unsqueeze(0)) filenameSave = "./save_color/" + filename[0].split("leftImg8bit/")[1] os.makedirs(os.path.dirname(filenameSave), exist_ok=True) label_save = ToPILImage()(label_color) #label_save=Scale((640,480))(label_save) label_save.save(filenameSave) #if (args.visualize): #vis.image(label_color.numpy()) print(step, filenameSave)
logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) #这种other的voctors 维度顶死了 embed_mat = np.load('./mrc_data/vectors.npy') embed = nn.Embedding.from_pretrained(torch.Tensor(embed_mat)) embed_dim = 200 hidden_dim = 150 lr = 0.001 batch_size = 64 weight_decay = 0.0001 vis = visdom.Visdom() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def draw(ep, acc, title): vis.line(X=torch.Tensor(1).fill_(ep).long(), Y=torch.Tensor(1).fill_(acc).float(), win=title, update='append' if ep != 0 else None, opts={ 'title': title, 'xlabel': 'EPOCH', 'ylabel': 'ACCURACY' })
def main(args): try: os.makedirs(args.checkpoint_path) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') else: raise print('loading dataset') train_loader, valid_loader, text_proc, train_sampler = get_dataset(args) print('building model') model = get_model(text_proc, args) # filter params that don't require gradient (credit: PyTorch Forum issue 679) # smaller learning rate for the decoder if args.optim == 'adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, betas=(args.alpha, args.beta), eps=args.epsilon) elif args.optim == 'sgd': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, weight_decay=1e-5, momentum=args.alpha, nesterov=True) else: raise NotImplementedError # learning rate decay every 1 epoch # verbose(bool) - 如果为True,则为每次更新向stdout输出一条消息。 默认值:False scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, factor=args.reduce_factor, patience=args.patience_epoch, verbose=True) # scheduler = lr_scheduler.ExponentialLR(optimizer, 0.6) # Number of parameter blocks in the network print("# of param blocks: {}".format(str(len(list( model.parameters()))))) # 287 best_loss = float('inf') if args.enable_visdom: import visdom vis = visdom.Visdom() vis_window = {'iter': None, 'loss': None} else: vis, vis_window = None, None all_eval_losses = [] all_cls_losses = [] all_reg_losses = [] all_sent_losses = [] all_mask_losses = [] all_training_losses = [] for train_epoch in range(args.max_epochs): t_epoch_start = time.time() print('Epoch: {}'.format(train_epoch)) if args.distributed: train_sampler.set_epoch(train_epoch) epoch_loss = train(train_epoch, model, optimizer, train_loader, vis, vis_window, args) all_training_losses.append(epoch_loss) (valid_loss, val_cls_loss, val_reg_loss, val_sent_loss, val_mask_loss) = valid(model, valid_loader) all_eval_losses.append(valid_loss) all_cls_losses.append(val_cls_loss) all_reg_losses.append(val_reg_loss) all_sent_losses.append(val_sent_loss) all_mask_losses.append(val_mask_loss) if args.enable_visdom: if vis_window['loss'] is None: if not args.distributed or (args.distributed and dist.get_rank() == 0): vis_window['loss'] = vis.line( X=np.tile(np.arange(len(all_eval_losses)), (6, 1)).T, Y=np.column_stack((np.asarray(all_training_losses), np.asarray(all_eval_losses), np.asarray(all_cls_losses), np.asarray(all_reg_losses), np.asarray(all_sent_losses), np.asarray(all_mask_losses))), opts=dict(title='Loss', xlabel='Validation Iter', ylabel='Loss', legend=[ 'train', 'dev', 'dev_cls', 'dev_reg', 'dev_sentence', 'dev_mask' ])) else: if not args.distributed or (args.distributed and dist.get_rank() == 0): vis.line(X=np.tile(np.arange(len(all_eval_losses)), (6, 1)).T, Y=np.column_stack( (np.asarray(all_training_losses), np.asarray(all_eval_losses), np.asarray(all_cls_losses), np.asarray(all_reg_losses), np.asarray(all_sent_losses), np.asarray(all_mask_losses))), win=vis_window['loss'], opts=dict(title='Loss', xlabel='Validation Iter', ylabel='Loss', legend=[ 'train', 'dev', 'dev_cls', 'dev_reg', 'dev_sentence', 'dev_mask' ])) # 保存当前的最佳模型 if valid_loss < best_loss: best_loss = valid_loss if (args.distributed and dist.get_rank() == 0) or not args.distributed: torch.save(model.module.state_dict(), os.path.join(args.checkpoint_path, 'best_model.t7')) print('*' * 5) print('Better validation loss {:.4f} found, save model'.format( valid_loss)) # 保存验证和训练的loss if (args.distributed and dist.get_rank() == 0) or not args.distributed: torch.save( { 'train_loss': all_training_losses, 'eval_loss': all_eval_losses, 'eval_cls_loss': all_cls_losses, 'eval_reg_loss': all_reg_losses, 'eval_sent_loss': all_sent_losses, 'eval_mask_loss': all_mask_losses, }, os.path.join(args.checkpoint_path, 'model_losses.t7')) # learning rate decay scheduler.step(valid_loss) # 检查验证loss是否要进行学习率下降 # validation/save checkpoint every a few epochs if train_epoch % args.save_checkpoint_every == 0 or train_epoch == args.max_epochs: if (args.distributed and dist.get_rank() == 0) or not args.distributed: torch.save( model.module.state_dict(), os.path.join(args.checkpoint_path, 'model_epoch_{}.t7'.format(train_epoch))) # all other process wait for the 1st process to finish # if args.distributed: # dist.barrier() print('-' * 80) print('Epoch {} summary'.format(train_epoch)) print('Train loss: {:.4f}, val loss: {:.4f}, Time: {:.4f}s'.format( epoch_loss, valid_loss, time.time() - t_epoch_start)) print('val_cls: {:.4f}, ' 'val_reg: {:.4f}, val_sentence: {:.4f}, ' 'val mask: {:.4f}'.format(val_cls_loss, val_reg_loss, val_sent_loss, val_mask_loss)) print('-' * 80)
batch_size = args.batch_size #accum_batch_size = 32 #iter_size = accum_batch_size / batch_size max_iter = 120000 weight_decay = 0.0005 stepvalues = (80000, 100000, 120000) gamma = 0.1 momentum = 0.9 # data augmentation hyperparams gt_pixel_jitter = 0.01 """#########################################################""" if args.visdom: import visdom viz = visdom.Visdom() """"########## Data Loading & dimension matching ##########""" # load custom CT dataset datapath = '/home/tkdrlf9202/Datasets/liver_lesion/lesion_dataset_Ponly_1332.h5' train_sets = [('liver_lesion')] def load_lesion_dataset(data_path): """ loads custom liver dataset if preprocessed h5 data exists, load it if not, load and preprocess raw liver dataset :param data_path: :return: flattened CT and mask data """ # check if the preprocessed dataset exists
def validate_G(model, epoch, win, vis): text_list = model.dataset.show(model.show_G()) text = str(epoch) + ': ' for b in range(model.batch_size): text += '<h5>' for word in text_list: if word[b] != '<R>': text += word[b] + ' ' else: text += 'R' + ' ' text += '</h5>' vis.text(text=text, win=win) if __name__ == '__main__': vis = visdom.Visdom(port=2424, env='seqGANs') seqGANs = SEQGANs().cuda() start_epoch = 0 seqGANs.load_state_dict( torch.load('../save_pretrained/pretrained_450.pkl')) ''' for j in range(500): time1 = time.time() total_loss = seqGANs.pretraining() validate_G(seqGANs, j, 'pre_samples', vis) vis.line(X=torch.tensor([j]), Y=torch.unsqueeze(torch.tensor(total_loss), 0), win='G_pre_loss', opts=dict(legend=['G_pre_loss']), update='append' if j > 0 else None) time2 = time.time() print('total_loss : ' + str(total_loss) + ' Times: ' + str(time2 - time1)) if j%50==0: torch.save(seqGANs.state_dict(), '../save_pretrained/pretrained_'+str(j)+'.pkl')
def __init__(self, args): self.use_cuda = args.cuda and torch.cuda.is_available() self.max_iter = args.max_iter # self.global_iter = 0 self.z_dim = args.z_dim self.beta = args.beta self.gamma = args.gamma self.C_max = args.C_max self.C_stop_iter = args.C_stop_iter self.objective = args.objective self.model = args.model self.lr = args.lr self.beta1 = args.beta1 self.beta2 = args.beta2 # model params self.c_dim = args.c_dim self.image_size = args.image_size self.g_conv_dim = args.g_conv_dim self.g_repeat_num = args.g_repeat_num self.d_conv_dim = args.d_conv_dim self.d_repeat_num = args.d_repeat_num self.norm_layer = get_norm_layer(norm_type=args.norm) self.z_pose_dim = args.z_pose_dim self.z_no_pose_dim = self.z_dim - self.z_pose_dim self.lambda_combine = args.lambda_combine self.lambda_recon = args.lambda_recon # new if args.dataset.lower() == 'dsprites': self.nc = 1 self.decoder_dist = 'bernoulli' elif args.dataset.lower() == '3dchairs': self.nc = 3 self.decoder_dist = 'gaussian' elif args.dataset.lower() == 'celeba': self.nc = 3 self.decoder_dist = 'gaussian' elif args.dataset.lower() == 'ilab_unsup': self.nc = 3 self.decoder_dist = 'gaussian' elif args.dataset.lower() == 'ilab_sup': self.nc = 3 self.decoder_dist = 'gaussian' else: raise NotImplementedError self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # model #self.Autoencoder = Generator_fc(self.nc, self.g_conv_dim, self.g_repeat_num) # model adain self.config = get_config( 'C:/Users/Charles/Desktop/research/code/CustomedNet/latest.yaml') self.gen_adain = AdaINGen(self.config['input_dim_a'], self.config['gen'], fp16=False) self.encoder_a = ft_net() self.gen_adain = self.gen_adain.cuda() self.encoder_a = self.encoder_a.cuda() #self.Autoencoder.to(self.device) #self.auto_optim = optim.Adam(self.Autoencoder.parameters(), lr=self.lr, # betas=(self.beta1, self.beta2))# two optim for each encoder self.auto_optim = optim.Adam(list(self.gen_adain.parameters()) + list(self.encoder_a.parameters()), lr=self.lr, betas=(self.beta1, self.beta2)) ''' use D ''' # self.netD = networks.define_D(self.nc, self.d_conv_dim, 'basic', # 3, 'instance', True, 'normal', 0.02, # '0,1') # log self.log_dir = './checkpoints/' + args.viz_name self.model_save_dir = args.model_save_dir self.viz_name = args.viz_name self.viz_port = args.viz_port self.viz_on = args.viz_on self.win_recon = None self.win_combine_sup = None self.win_combine_unsup = None # self.win_d_no_pose_losdata_loaders = None # self.win_d_pose_loss = None # self.win_equal_pose_loss = None # self.win_have_pose_loss = None # self.win_auto_loss_fake = None # self.win_loss_cor_coe = None # self.win_d_loss = None if self.viz_on: print('vizdom on') self.viz = visdom.Visdom( port=self.viz_port) #,use_incoming_socket=False self.resume_iters = args.resume_iters self.ckpt_dir = os.path.join(args.ckpt_dir, args.viz_name) if not os.path.exists(self.ckpt_dir): os.makedirs(self.ckpt_dir, exist_ok=True) self.ckpt_name = args.ckpt_name # if self.ckpt_name is not None: # self.load_checkpoint(self.ckpt_name) self.save_output = args.save_output self.output_dir = os.path.join(args.output_dir, args.viz_name) if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.gather_step = args.gather_step self.display_step = args.display_step self.save_step = args.save_step self.dset_dir = args.dset_dir self.dataset = args.dataset self.batch_size = args.batch_size self.data_loader = return_data(args) self.gather = DataGather()
def reinit(self, env='default', **kwargs): # 修改visdom的配置 self.vis = visdom.Visdom(env=env, use_incoming_socket=False, **kwargs) return self