def train_for_n(model_name,\ XT_nd, XT_dg, XTest_dg,\ generator, discriminator, GAN, \ nb_epoch=5000, plt_frq=25, BATCH_SIZE=32,\ losses = {"d":[], "g":[]}): start_string = model_name + '_' + time.strftime('%m_%d__%H_%M_%S', time.localtime()) os.mkdir(os.path.join('output', start_string)) for e in tqdm(range(nb_epoch)): # Make generative images image_batch = XT_nd[np.random.randint(0,XT_nd.shape[0],size=BATCH_SIZE),:,:,:] disguised_batch = XT_dg[np.random.randint(0,XT_dg.shape[0],size=BATCH_SIZE),:,:,:] #print(disguised_batch.shape) generated_images = generator.predict(disguised_batch) # Train discriminator on generated images X = np.concatenate((image_batch, generated_images)) y = np.zeros([2*BATCH_SIZE,2]) y[0:BATCH_SIZE,1] = 1 y[BATCH_SIZE:,0] = 1 #print('Batch X shape is {}'.format(X.shape)) make_trainable(discriminator,True) d_loss = discriminator.train_on_batch(X,y) losses["d"].append(d_loss) # train Generator-Discriminator stack on # input noise to non-generated output class non_generated = XT_dg[np.random.randint(0,XT_dg.shape[0],size=BATCH_SIZE),:,:,:] y2 = np.zeros([BATCH_SIZE,2]) y2[:,1] = 1 make_trainable(discriminator,False) g_loss = GAN.train_on_batch(non_generated, y2 ) losses["g"].append(g_loss) # Updates plots if e%plt_frq==plt_frq-1: plot_loss(losses, gen_figname(e, start_string, 'loss',)) plot_gen(generator, XTest_dg, gen_figname(e, start_string, 'gen')) return losses
def main(): # parse arguments args = parse_args() # fix seed np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.deterministic = True # set logger log_format = '[%(asctime)s] %(message)s' logging.basicConfig(level=logging.INFO, format=log_format, stream=sys.stderr) # set size seq_size = args.seq_size init_size = args.init_size # set device as gpu device = torch.device('cuda', 0) # set writer exp_name = set_exp_name(args) writer = SummaryWriter(args.log_dir + exp_name) LOGGER.info('EXP NAME: ' + exp_name) # load dataset train_loader, test_loader = full_dataloader(seq_size, init_size, args.batch_size) LOGGER.info('Dataset loaded') # init models model = EnvModel(belief_size=args.belief_size, state_size=args.state_size, num_layers=args.num_layers, max_seg_len=args.seg_len, max_seg_num=args.seg_num).to(device) LOGGER.info('Model initialized') # init optimizer optimizer = Adam(params=model.parameters(), lr=args.learn_rate, amsgrad=True) # test data pre_test_full_data_list = iter(test_loader).next() pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device), args.obs_bit) # for each iter b_idx = 0 while b_idx <= args.max_iters: # for each batch for train_obs_list in train_loader: b_idx += 1 # mask temp annealing if args.beta_anneal: model.state_model.mask_beta = ( args.max_beta - args.min_beta) * 0.999**( b_idx / args.beta_anneal) + args.min_beta else: model.state_model.mask_beta = args.max_beta ############## # train time # ############## # get input data train_obs_list = preprocess(train_obs_list.to(device), args.obs_bit) # run model with train mode model.train() optimizer.zero_grad() results = model(train_obs_list, seq_size, init_size, args.obs_std) # get train loss and backward update train_total_loss = results['train_loss'] train_total_loss.backward() if args.grad_clip > 0.0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # log if b_idx % 10 == 0: log_str, log_data = log_train(results, writer, b_idx) LOGGER.info(log_str, *log_data) ############# # test time # ############# if b_idx % 100 == 0: # set data pre_test_init_data_list = pre_test_full_data_list[:, : init_size] post_test_init_data_list = postprocess(pre_test_init_data_list, args.obs_bit) pre_test_input_data_list = pre_test_full_data_list[:, init_size: (init_size + seq_size)] post_test_input_data_list = postprocess( pre_test_input_data_list, args.obs_bit) with torch.no_grad(): ################## # test data elbo # ################## model.eval() results = model(pre_test_full_data_list, seq_size, init_size, args.obs_std) post_test_rec_data_list = postprocess( results['rec_data'], args.obs_bit) output_img, output_mask = plot_rec( post_test_init_data_list, post_test_input_data_list, post_test_rec_data_list, results['mask_data'], results['p_mask'], results['q_mask']) # log log_str, log_data = log_test(results, writer, b_idx) LOGGER.info(log_str, *log_data) writer.add_image('valid/rec_image', output_img.transpose([2, 0, 1]), global_step=b_idx) writer.add_image('valid/mask_image', output_mask.transpose([2, 0, 1]), global_step=b_idx) ################### # full generation # ################### pre_test_gen_data_list, test_mask_data_list = model.full_generation( pre_test_init_data_list, seq_size) post_test_gen_data_list = postprocess( pre_test_gen_data_list, args.obs_bit) # log output_img = plot_gen(post_test_init_data_list, post_test_gen_data_list, test_mask_data_list) writer.add_image('valid/full_gen_image', output_img.transpose([2, 0, 1]), b_idx)
def main(): # parse arguments args = parse_args() # fix seed np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.deterministic = True # set size seq_size = args.seq_size init_size = args.init_size # set device as gpu device = torch.device('cuda', 0) # set writer exp_name = set_exp_name(args) writer = SummaryWriter(args.log_dir + exp_name) # load dataset train_loader, test_loader = maze_dataloader(seq_size, init_size, args.batch_size) # init models hrssm_params = { 'seq_size': args.seq_size, 'init_size': args.init_size, 'state_size': args.state_size, 'belief_size': args.belief_size, 'num_layers': args.num_layers, 'max_seg_num': args.seg_num, 'max_seg_len': args.seg_len } optimizer = optim.Adam optimizer_params = {'lr': args.learn_rate, 'amsgrad': True} model = HRSSM(optimizer=optimizer, optimizer_params=optimizer_params, clip_grad_norm=args.grad_clip, hrssm_params=hrssm_params) # test data pre_test_full_data_list = iter(test_loader).next() pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device)) # for each iter b_idx = 0 while b_idx <= args.max_iters: # for each batch for train_obs_list in train_loader: b_idx += 1 # mask temp annealing if args.beta_anneal: model.mask_beta = (args.max_beta - args.min_beta) * 0.999**( b_idx / args.beta_anneal) + args.min_beta else: model.mask_beta = args.max_beta # get input data train_obs_list = preprocess(train_obs_list.to(device)) # train step and return the loss loss = model.train(train_obs_list) # log if b_idx % 1000 == 0: writer.add_scalar('train/total_loss', loss, b_idx) # test time if b_idx % 1000 == 0: # set data pre_test_init_data_list = pre_test_full_data_list[:, : init_size] post_test_init_data_list = post_process_maze( pre_test_init_data_list) pre_test_input_data_list = pre_test_full_data_list[:, init_size: (init_size + seq_size)] post_test_input_data_list = post_process_maze( pre_test_input_data_list) with torch.no_grad(): # test data elbo results = model.reconstruction(pre_test_full_data_list) post_test_rec_data_list = post_process_maze( results['rec_data']) output_img, output_mask = plot_rec( post_test_init_data_list, post_test_input_data_list, post_test_rec_data_list, results['mask_data'], results['p_mask'], results['q_mask']) # log loss = model.test(pre_test_full_data_list) writer.add_scalar('valid/total_loss', loss, b_idx) writer.add_image('valid/rec_image', output_img.transpose([2, 0, 1]), global_step=b_idx) writer.add_image('valid/mask_image', output_mask.transpose([2, 0, 1]), global_step=b_idx) # full generation pre_test_gen_data_list, test_mask_data_list = model.full_generation( pre_test_init_data_list, seq_size) post_test_gen_data_list = post_process_maze( pre_test_gen_data_list) # log output_img = plot_gen(post_test_init_data_list, post_test_gen_data_list, test_mask_data_list) writer.add_image('valid/full_gen_image', output_img.transpose([2, 0, 1]), b_idx) # jumpy imagination pre_test_gen_data_list = model.jumpy_generation( pre_test_init_data_list, seq_size) post_test_gen_data_list = post_process_maze( pre_test_gen_data_list) # log output_img = plot_gen(post_test_init_data_list, post_test_gen_data_list) writer.add_image('valid/jumpy_gen_image', output_img.transpose([2, 0, 1]), b_idx)
def train(model_name,\ data_collection,\ nets, \ nb_epoch=5000, plt_frq=25, BATCH_SIZE=32,\ losses = {"fw_d_l":[], "fw_d_a":[], \ "bw_d_l":[], "bw_d_a":[], \ "g_fw_id":[], "g_fw_recon":[], \ "g_bw_id":[], "g_bw_recon":[], \ "g_loss":[]}): sprint('Preparing output path', level=1) #os.mkdir(build_model_path(model_name, 'output')) parent_path, start_string = build_model_path(model_name, 'output') setup_workspace(parent_path) XT_nd, XTest_nd, XT_dg, XTest_dg = data_collection gan, gen_fw, gen_bw, dis_fw, dis_bw = nets for e in tqdm(range(nb_epoch)): # Select batch nd_batch = XT_nd[ np.random.randint(0, XT_nd.shape[0], size=BATCH_SIZE), :, :, :] dg_batch = XT_dg[ np.random.randint(0, XT_dg.shape[0], size=BATCH_SIZE), :, :, :] # Generate images fw_generated = gen_fw.predict(dg_batch) bw_generated = gen_bw.predict(nd_batch) # Prepare training 'output' fw_X = np.concatenate((nd_batch, fw_generated)) fw_y = np.zeros([2 * BATCH_SIZE, 2]) fw_y[0:BATCH_SIZE, 1] = 1 fw_y[BATCH_SIZE:, 0] = 1 bw_X = np.concatenate((dg_batch, bw_generated)) bw_y = np.zeros([2 * BATCH_SIZE, 2]) bw_y[0:BATCH_SIZE, 1] = 1 bw_y[BATCH_SIZE:, 0] = 1 make_trainable(dis_fw, True) fw_d_loss = dis_fw.train_on_batch(fw_X, fw_y) #losses["fw_d_l"].append(fw_d_loss) losses["fw_d_l"].append(fw_d_loss[0]) losses["fw_d_a"].append(fw_d_loss[1]) make_trainable(dis_bw, True) bw_d_loss = dis_bw.train_on_batch(bw_X, bw_y) #losses["bw_d_l"].append(bw_d_loss) losses["bw_d_l"].append(bw_d_loss[0]) losses["bw_d_a"].append(bw_d_loss[1]) # train combined generators # Remember that CycleGAN model computes losses as follows # Combine Discriminator and Generator # gan = Model(inputs=[image_fw, image_bw], \ # outputs=[dis_result_fw, dis_result_bw, \ # same_fw, same_bw, \ # recovered_fw, recovered_bw]) # gan.compile(loss=['binary_crossentropy', 'binary_crossentropy',\ # 'mae', 'mae', \ # 'mae', 'mae'],\ # loss_weights = [1, 1,\ # identity_loss, identity_loss,\ # consistency_loss, consistency_loss],\ # optimizer=optimizer) nd_batch_2 = XT_nd[ np.random.randint(0, XT_nd.shape[0], size=BATCH_SIZE), :, :, :] dg_batch_2 = XT_dg[ np.random.randint(0, XT_dg.shape[0], size=BATCH_SIZE), :, :, :] y2 = np.zeros([BATCH_SIZE, 2]) y2[:, 1] = 1 #make_trainable(dis_fw, False) #make_trainable(dis_bw, False) g_loss = gan.train_on_batch( [dg_batch_2, nd_batch_2], [y2, y2,\ dg_batch_2, nd_batch_2,\ dg_batch_2, nd_batch_2 ] ) losses["g_loss"].append(g_loss[0]) losses["g_fw_id"].append(g_loss[3]) losses["g_bw_id"].append(g_loss[4]) losses["g_fw_recon"].append(g_loss[5]) losses["g_bw_recon"].append(g_loss[6]) # Updates plots if e % plt_frq == plt_frq - 1: #plot_loss(losses, gen_figname(e, start_string, 'loss', parent_path=parent_path)) plot_training_stats( losses, gen_figname(e, start_string, 'loss', parent_path=parent_path)) plot_gen( gen_fw, gen_bw, XTest_dg, gen_figname(e, start_string, 'gen_fw', parent_path=parent_path)) plot_gen( gen_bw, gen_fw, XTest_nd, gen_figname(e, start_string, 'gen_bw', parent_path=parent_path)) return losses
def main(): # configs args = toml.load(open('config.toml'))['model'] seed = args['seed'] batch_size = args['batch_size'] seq_size = args['seq_size'] init_size = args['init_size'] state_size = args['state_size'] belief_size = args['belief_size'] num_layers = args['num_layers'] obs_std = args['obs_std'] obs_bit = args['obs_bit'] learn_rate = args['learn_rate'] grad_clip = args['grad_clip'] max_iters = args['max_iters'] seg_num = args['seg_num'] seg_len = args['seg_len'] max_beta = args['max_beta'] min_beta = args['min_beta'] beta_anneal = args['beta_anneal'] log_dir = args['log_dir'] test_times = args['test_times'] gpu_ids = args['gpu_ids'] data_path = args['data_path'] check_path = args['check_path'] # fix seed np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True # set logger log_format = '[%(asctime)s] %(message)s' logging.basicConfig(level=logging.INFO, format=log_format, stream=sys.stderr) # set size seq_size = seq_size init_size = init_size # set writer exp_name = set_exp_name(args) writer = SummaryWriter(log_dir + exp_name) LOGGER.info('EXP NAME: ' + exp_name) # load dataset train_loader, test_loader, check_loader = full_dataloader(seq_size, init_size, batch_size, data_path, check_path) LOGGER.info('Dataset loaded') # init models model = EnvModel(belief_size=belief_size, state_size=state_size, num_layers=num_layers, max_seg_len=seg_len, max_seg_num=seg_num) if torch.cuda.is_available(): device = torch.device(f'cuda:{gpu_ids[0]}') model.to(device) model = nn.DataParallel(model, device_ids=gpu_ids) model = model.module else: device = torch.device('cpu') model.to(device) LOGGER.info('Model initialized') # init optimizer optimizer = Adam(params=model.parameters(), lr=learn_rate, amsgrad=True) # test data pre_test_full_list = iter(test_loader).next() pre_test_full_data_list = pre_test_full_list['img'] pre_test_full_point_list = pre_test_full_list['point'] pre_test_full_data_list = preprocess(pre_test_full_data_list.to(device), obs_bit) # for each iter b_idx = 0 while b_idx <= max_iters: # for each batch for train_list in train_loader: b_idx += 1 # mask temp annealing if beta_anneal: model.state_model.mask_beta = (max_beta - min_beta) * 0.999 ** (b_idx / beta_anneal) + min_beta else: model.state_model.mask_beta = max_beta ############## # train time # ############## # get input data train_obs_list = train_list['img'] train_points_list = train_list['point'] train_obs_list = preprocess(train_obs_list.to(device), obs_bit) # run model with train mode model.train() optimizer.zero_grad() results = model(train_obs_list, train_points_list, seq_size, init_size, obs_std) # get train loss and backward update train_total_loss = results['train_loss'] train_total_loss.backward() if grad_clip > 0.0: nn.utils.clip_grad_norm_(model.parameters(), grad_clip) optimizer.step() # log if b_idx % 10 == 0: log_str, log_data = log_train(results, writer, b_idx) LOGGER.info(log_str, *log_data) ############# # test time # ############# if b_idx % test_times == 0: # set data pre_test_init_data_list = pre_test_full_data_list[:, :init_size] post_test_init_data_list = postprocess(pre_test_init_data_list, obs_bit) pre_test_input_data_list = pre_test_full_data_list[:, init_size:(init_size + seq_size)] post_test_input_data_list = postprocess(pre_test_input_data_list, obs_bit) with torch.no_grad(): ################## # test data elbo # ################## model.eval() results = model(pre_test_full_data_list, pre_test_full_point_list, seq_size, init_size, obs_std) post_test_rec_data_list = postprocess(results['rec_data'], obs_bit) output_img, output_mask = plot_rec(post_test_init_data_list, post_test_input_data_list, post_test_rec_data_list, results['mask_data'], results['p_mask'], results['q_mask']) # log log_str, log_data = log_test(results, writer, b_idx) LOGGER.info(log_str, *log_data) writer.add_image('valid/rec_image', output_img.transpose([2, 0, 1]), global_step=b_idx) writer.add_image('valid/mask_image', output_mask.transpose([2, 0, 1]), global_step=b_idx) ################### # full generation # ################### pre_test_gen_data_list, test_mask_data_list = model.full_generation(pre_test_init_data_list, seq_size) post_test_gen_data_list = postprocess(pre_test_gen_data_list, obs_bit) # log output_img = plot_gen(post_test_init_data_list, post_test_gen_data_list, test_mask_data_list) writer.add_image('valid/full_gen_image', output_img.transpose([2, 0, 1]), b_idx) with torch.no_grad(): model.eval() acc = [] precision = [] recall = [] f_value = [] for check in check_loader: check_obs = check['img'] check_point = check['point'] check_obs = preprocess(check_obs.to(device), obs_bit) results = model(check_obs, check_point, seq_size, init_size, obs_std) metrixs = calc_metrixs(results['mask_data_true'], results['mask_data']) acc.append(metrixs['accuracy']) precision.append(metrixs['precision']) recall.append(metrixs['recall']) f_value.append(metrixs['f_value']) acc = np.concatenate(acc) precision = np.concatenate(precision) recall = np.concatenate(recall) f_value = np.concatenate(f_value) print('shape: ', acc.shape) print('accuracy: ', acc.mean()) print('precision: ', precision.mean()) print('recall: ', recall.mean()) print('f_value: ', f_value.mean())