def train_mdn(logger=None): out_dir, listdir, featslistdir = get_dirpaths(args) batchsize = args.batchsize hiddensize = args.hiddensize nmix = args.nmix nepochs = args.epochs_mdn data = colordata(\ os.path.join(out_dir, 'images'), \ listdir=listdir,\ featslistdir=featslistdir, split='train') nbatches = np.int_(np.floor(data.img_num/batchsize)) data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\ batch_size=batchsize, shuffle=True, drop_last=True) model_vae = VAE() model_vae.cuda() model_vae.load_state_dict(torch.load('%s/models/model_vae.pth' % (out_dir))) model_vae.train(False) model_mdn = MDN() model_mdn.cuda() model_mdn.train(True) optimizer = optim.Adam(model_mdn.parameters(), lr=1e-3) itr_idx = 0 for epochs_mdn in range(nepochs): train_loss = 0. for batch_idx, (batch, batch_recon_const, batch_weights, _, batch_feats) in \ tqdm(enumerate(data_loader), total=nbatches): input_color = Variable(batch).cuda() input_greylevel = Variable(batch_recon_const).cuda() input_feats = Variable(batch_feats).cuda() z = Variable(torch.randn(batchsize, hiddensize)) optimizer.zero_grad() mu, logvar, _ = model_vae(input_color, input_greylevel, z) mdn_gmm_params = model_mdn(input_feats) loss, loss_l2 = mdn_loss(mdn_gmm_params, mu, torch.sqrt(torch.exp(logvar)), batchsize) loss.backward() optimizer.step() train_loss = train_loss + loss.data[0] if(logger): logger.update_plot(itr_idx, [loss.data[0], loss_l2.data[0]], plot_type='mdn') itr_idx += 1 train_loss = (train_loss*1.)/(nbatches) print('[DEBUG] Training MDN, epoch %d has loss %f' % (epochs_mdn, train_loss)) torch.save(model_mdn.state_dict(), '%s/models/model_mdn.pth' % (out_dir))
def divcolor(): out_dir, listdir, featslistdir = get_dirpaths(args) batchsize = args.batchsize hiddensize = args.hiddensize nmix = args.nmix data = colordata(\ os.path.join(out_dir, 'images'), \ listdir=listdir,\ featslistdir=featslistdir, split='test') nbatches = np.int_(np.floor(data.img_num/batchsize)) data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\ batch_size=batchsize, shuffle=True, drop_last=True) model_vae = VAE() model_vae.cuda() model_vae.load_state_dict(torch.load('%s/models/model_vae.pth' % (out_dir))) model_vae.train(False) model_mdn = MDN() model_mdn.cuda() model_mdn.load_state_dict(torch.load('%s/models/model_mdn.pth' % (out_dir))) model_mdn.train(False) for batch_idx, (batch, batch_recon_const, batch_weights, \ batch_recon_const_outres, batch_feats) in \ tqdm(enumerate(data_loader), total=nbatches): input_feats = Variable(batch_feats).cuda() mdn_gmm_params = model_mdn(input_feats) gmm_mu, gmm_pi = get_gmm_coeffs(mdn_gmm_params) gmm_pi = gmm_pi.view(-1, 1) gmm_mu = gmm_mu.view(-1, hiddensize) for j in range(batchsize): batch_j = np.tile(batch[j, ...].numpy(), (batchsize, 1, 1, 1)) batch_recon_const_j = np.tile(batch_recon_const[j, ...].numpy(), (batchsize, 1, 1, 1)) batch_recon_const_outres_j = np.tile(batch_recon_const_outres[j, ...].numpy(), \ (batchsize, 1, 1, 1)) input_color = Variable(torch.from_numpy(batch_j)).cuda() input_greylevel = Variable(torch.from_numpy(batch_recon_const_j)).cuda() curr_mu = gmm_mu[j*nmix:(j+1)*nmix, :] orderid = np.argsort(\ gmm_pi[j*nmix:(j+1)*nmix, 0].cpu().data.numpy().reshape(-1)) z = curr_mu.repeat(np.int((batchsize*1.)/nmix), 1) _, _, color_out = model_vae(input_color, input_greylevel, z, is_train=False) data.saveoutput_gt(color_out.cpu().data.numpy()[orderid, ...], \ batch_j[orderid, ...], \ 'divcolor_%05d_%05d' % (batch_idx, j), \ nmix, \ net_recon_const=batch_recon_const_outres_j[orderid, ...])
def load_vaes(H, logprint): vae = VAE(H) if H.restore_path: logprint(f'Restoring vae from {H.restore_path}') restore_params(vae, H.restore_path, map_cpu=True, local_rank=H.local_rank, mpi_size=H.mpi_size) ema_vae = VAE(H) if H.restore_ema_path: logprint(f'Restoring ema vae from {H.restore_ema_path}') restore_params(ema_vae, H.restore_ema_path, map_cpu=True, local_rank=H.local_rank, mpi_size=H.mpi_size) else: ema_vae.load_state_dict(vae.state_dict()) ema_vae.requires_grad_(False) vae = vae.cuda(H.local_rank) ema_vae = ema_vae.cuda(H.local_rank) vae = DistributedDataParallel(vae, device_ids=[H.local_rank], output_device=H.local_rank) if len(list(vae.named_parameters())) != len(list(vae.parameters())): raise ValueError('Some params are not named. Please name all params.') total_params = 0 for name, p in vae.named_parameters(): total_params += np.prod(p.shape) logprint(total_params=total_params, readable=f'{total_params:,}') return vae, ema_vae
class VAERNN(torch.nn.Module): def __init__(self): super(VAERNN, self).__init__() self.z_size = 32 self.kl_tolerance = 0.5 self.vae = VAE() self.rnn = RNN() self.vae.train() self.rnn.train() self.init_() self.is_cuda = False def load(self): self.vae.load_state_dict(torch.load(vae_model_path, map_location=lambda storage, loc: storage)) self.rnn.load_state_dict(torch.load(rnn_model_path, map_location=lambda storage, loc: storage)) def init_(self): self.h = self.rnn.init_() def forward(self, inputs): z = self.vae(inputs) return z def when_train(self, inputs, one, outputs): if self.is_cuda: self.vae.is_cuda = True self.vae.cuda() self.rnn.is_cuda = True self.rnn.cuda() # self.rnn.init_() z = self.vae(inputs) # z = self.vae(inputs) # self.next_kl_loss = self.vae.kl_loss # self.next_r_loss = self.vae.r_loss z = z.unsqueeze(0) z_a = torch.cat((z, one), dim=2) self.rnn(z_a) z_next = self.vae(outputs) self.next_kl_loss = self.vae.kl_loss self.next_r_loss = self.vae.r_loss z_next = z_next.unsqueeze(0) # z_next = z self.pred_loss = self.rnn.prediction_loss_f(z_next) self.mdn_loss = self.rnn.mdn_loss_f(z_next)
def load_vaes(H): vae = None #vae = VAE(H) #if H.restore_path: # #logprint(f'Restoring vae from {H.restore_path}') # print('Restoring vae from :', H.restore_path) # restore_params(vae, H.restore_path, map_cpu=True, local_rank=None, mpi_size=None) ema_vae = VAE(H) if H.restore_ema_path: #logprint(f'Restoring ema vae from {H.restore_ema_path}') restore_params(ema_vae, H.restore_ema_path, map_cpu=True, local_rank=None, mpi_size=None) elif (vae): ema_vae.load_state_dict(vae.state_dict()) ema_vae.requires_grad_(False) #vae = vae.cuda(H.local_rank) ema_vae = ema_vae.cuda(H.local_rank) #vae = DistributedDataParallel(vae, device_ids=[H.local_rank], output_device=H.local_rank) #if len(list(vae.named_parameters())) != len(list(vae.parameters())): # raise ValueError('Some params are not named. Please name all params.') #total_params = 0 #for name, p in vae.named_parameters(): # total_params += np.prod(p.shape) #print("Totat Params : ", total_params) #logprint(total_params=total_params, readable=f'{total_params:,}') return vae, ema_vae
def objective(params): """ Objective function to be minimized: loss with respect to our hyperparameters. """ enc_kernel1 = int(params[0]) enc_kernel2 = int(params[1]) enc_kernel3 = int(params[2]) dec_kernel1 = int(params[3]) dec_kernel2 = int(params[4]) dec_kernel3 = int(params[5]) # Contact matrices are 21x21 input_dim = 441 encoder = Encoder(input_size=input_dim, latent_size=8, kernel1=enc_kernel1, kernel2=enc_kernel2, kernel3=enc_kernel3) decoder = Decoder(latent_dim=8, output_size=input_size, kernel1=dec_kernel1, kernel2=dec_kernel2, kernel3=dec_kernel3) vae = VAE(encoder, decoder) criterion = nn.MSELoss() use_cuda = args.use_cuda if use_cuda: encoder = encoder.cuda() deconder = decoder.cuda() vae = vae.cuda() criterion = criterion.cuda() optimizer = optim.Adam(vae.parameters(), lr=0.0001) epoch_loss = 0 total_loss = 0 for epoch in range(100): for i, data in enumerate(trainloader, 0): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) optimizer.zero_grad() dec = vae(inputs) ll = latent_loss(vae.z_mean, vae.z_sigma) loss = criterion(dec, inputs) + ll loss.backward() optimizer.step() epoch_loss = loss.data[0] print(epoch, epoch_loss) total_loss += epoch_loss return total_loss
def main(): """ Generate images from a saved model """ train_data = UnlabeledContact( data='/home/ygx/data/fspeptide/fs_peptide.npy') print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) #encoder = Encoder(input_size=args.input_size, latent_size=args.latent_size) #decoder = Decoder(latent_size=args.latent_size, output_size=args.input_size) #vae = VAE(encoder, decoder, use_cuda=args.use_cuda) vae = VAE() # Load saved model vae.load_state_dict(torch.load(args.model_path + args.model_name)) if args.use_cuda: #encoder = encoder.cuda() #decoder = decoder.cuda() vae = vae.cuda() latent_arrys = [] recon_arrys = [] for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if args.use_cuda: inputs = inputs.cuda() inputs = Variable(inputs) #latent_array = encoder(inputs).data.cpu().numpy() #print('latent_array has shape {}'.format(latent_array.shape)) #latent_arrys.append(latent_array) #reconstructed_array = vae(inputs).data.cpu().numpy() reconstructed_array, mu, _ = vae(inputs) reconstructed_array = reconstructed_array.data.cpu().numpy() latent_array = mu.data.cpu().numpy() recon_arrys.append(reconstructed_array) latent_arrys.append(latent_array) if batch_idx % 100 == 0: print('Saving progress: {:.3f}%'.format(batch_idx * 100. / len(trainloader))) print('\nNumber of images prepared: {}'.format(len(latent_arrys))) latent_stacked = np.stack(latent_arrys, axis=0) latent_filename = 'latent_imgs_fc' np.save(args.latent_save_path + latent_filename, latent_stacked) recon_stacked = np.stack(recon_arrys, axis=0) recon_filename = 'recon_imgs_fc' np.save(args.recon_save_path + recon_filename, recon_stacked)
def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 encoder = Encoder(input_size=input_size, latent_size=3) decoder = Decoder(latent_size=3, output_size=input_size) vae = VAE(encoder, decoder, use_cuda=use_cuda) criterion = nn.MSELoss() if use_cuda: encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) encoder = encoder.cuda().half() decoder = decoder.cuda().half() vae = nn.DataParallel(vae) vae = vae.cuda().half() criterion = criterion.cuda().half() optimizer = optim.SGD(vae.parameters(), lr=0.01) clock = AverageMeter(name='clock16', rank=0) epoch_loss = 0 total_loss = 0 end = time.time() for epoch in range(15): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] # inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda().half() inputs = Variable(inputs) optimizer.zero_grad() dec = vae(inputs) ll = latent_loss(vae.z_mean, vae.z_sigma) loss = criterion(dec, inputs) + ll loss.backward() optimizer.step() epoch_loss += loss.data[0] clock.update(time.time() - end) end = time.time() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) clock.save(path='/home/ygx/libraries/mds/molecules/molecules/linear_vae')
class VAERNN(torch.nn.Module): def __init__(self): super(VAERNN, self).__init__() self.z_size = 32 self.kl_tolerance = 0.5 self.vae = VAE() self.rnn = RNN() self.vae.train() self.rnn.train() self.init_() self.is_cuda = False def load(self): self.vae.load_state_dict( torch.load(vae_model_path, map_location=lambda storage, loc: storage)) self.rnn.load_state_dict( torch.load(rnn_model_path, map_location=lambda storage, loc: storage)) def init_(self): self.h = self.rnn.init_() def forward(self, inputs): z = self.vae(inputs) return z def when_train(self, inputs, one, outputs): if self.is_cuda: self.vae.is_cuda = True self.vae.cuda() self.rnn.is_cuda = True self.rnn.cuda() # with torch.no_grad(): z = self.vae(inputs) self.next_kl_loss = self.vae.kl_loss self.next_r_loss = self.vae.r_loss
class VAERNN(torch.nn.Module): def __init__(self): super(VAERNN, self).__init__() self.z_size = 32 self.kl_tolerance = 0.5 self.vae = VAE() self.rnn = RNN() self.vae.load_state_dict( torch.load(vae_model_path, map_location=lambda storage, loc: storage)) self.rnn.load_state_dict( torch.load(rnn_model_path, map_location=lambda storage, loc: storage)) self.vae.train() self.rnn.train() self.init_() self.is_cuda = False def init_(self): self.h = self.rnn.init_() def forward(self, inputs): z = self.vae(inputs) # z = z.unsqueeze(0) # z = self.rnn(z) print('z', z.shape) print('h', self.h.shape) return z, self.h def when_train(self, inputs, one, outputs): self.vae.is_cuda = True self.vae.cuda() self.rnn.is_cuda = True self.rnn.cuda() # print('inputs outputs') # print(inputs.shape) # print(outputs.shape) with torch.no_grad(): z = self.vae(inputs) # print(z.shape) z = z.unsqueeze(0) # print(z.shape) z_a = torch.cat((z, one), dim=2) self.rnn(z_a) z_next = self.vae(outputs) self.next_kl_loss = self.vae.kl_loss self.next_r_loss = self.vae.r_loss # print('z_next', z_next.shape) # print(next_kl_loss.shape) # print(next_r_loss.shape) # print('rnn now') # print(self.rnn.z_prediction.shape) z_next = z_next.unsqueeze(0) # print(z_next.shape) # input('hi') self.pred_loss = self.rnn.prediction_loss_f(z_next) self.mdn_loss = self.rnn.mdn_loss_f(z_next) # print(pred_loss.shape) # print(mdn_loss.shape) z_next_hat = self.rnn.z_prediction # print('making v m error') # print(z_next_hat.shape) # print(outputs.shape) z_next_hat = z_next_hat.squeeze(0) self.pred_recon_loss = self.vae.reconstruction_error_f( z_next_hat, inputs) # print(pred_recon_loss.shape) ''' w = self.rnn.logweight_mdn m = self.rnn.mean_mdn s = self.rnn.logstd_mdn print('w', w.shape) print(w[0, 0, 0]) a = w[0, 0, 0] b = torch.exp(a) print(b) n = b.multinomial(num_samples=1).data print(n) weight = torch.exp(w) ns = weight.multinomial(num_samples=1).data print(ns.shape) c = weight[0, 0] d = c.multinomial(num_samples=1).data print(c.shape) print(d.shape) weight = weight.squeeze(0) print('ww', weight.shape) a = torch.reshape(weight, (-1, 5)) print(a.shape) d = a.multinomial(num_samples=5).data print('d is ', d.shape) b = torch.reshape(d, (-1, 32, 5)) print(b.shape) #c = (weight==b) #print(c.shape) #print(c[200,30,4]) c = b[:,:,0:1] c = c.unsqueeze(0) print(c[0,250,20,0]) print(c[0,c[0,250,20,0],20,0]) print(c.shape) samples = c # z_a = z_a.unsqueeze(0) ''' # print(z_a.shape) def make_prediction(self, action): one = one_hot(action) one = torch.from_numpy(one) one = one.unsqueeze(0) one = one.type(torch.float) z_a = torch.cat((z, one), dim=1) z_a = z_a.unsqueeze(0)
if __name__ == '__main__': # params for visualizations n_images = 10 transformers = transforms.Compose([transforms.ToTensor()]) img_path = CONFIG.shots.dir dataset = DataSet(img_path, transform=transformers) loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=n_images, shuffle=True) model = VAE() model.cuda() resume_path = CONFIG.model.dir + '/model.checkpoint.tar' if os.path.isfile(resume_path): print("=> loading checkpoint '{}'".format(resume_path)) checkpoint = torch.load(resume_path) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint['epoch'])) reconstruction(data_loader=loader, model=model, n_images=n_images) loader.batch_size = 1 # save_encoding(data_loader=loader, model=model) else: print("=> no checkpoint found at '{}'".format(resume_path))
def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 encoder = Encoder(input_size=input_size, latent_size=3) decoder = Decoder(latent_size=3, output_size=input_size) vae = VAE(encoder, decoder, use_cuda=use_cuda) criterion = nn.MSELoss() if use_cuda: encoder = encoder.cuda().half() decoder = decoder.cuda().half() vae = vae.cuda().half() criterion = criterion.cuda().half() optimizer = optim.SGD(vae.parameters(), lr=0.01) epoch_loss = 0 total_loss = 0 for epoch in range(100): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda().half() inputs = Variable(inputs) optimizer.zero_grad() dec = vae(inputs) ll = latent_loss(vae.z_mean, vae.z_sigma) loss = criterion(dec, inputs) + ll loss.backward() optimizer.step() epoch_loss += loss.data[0] if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) if epoch < 10: # Get latent encoding latent_array = encoder(inputs).data[0].cpu().float().numpy() filename = 'latent_epoch' + str(epoch) np.save('./latent_saves/' + filename, latent_array) # Get reconstructed image reconstructed_array = vae( inputs).data[0].cpu().float().numpy().reshape(21, 21) recon_filename = 'reconstructed_epoch' + str(epoch) np.save('./reconstruct_saves/' + recon_filename, reconstructed_array) if epoch % 10 == 0: torch.save(vae.state_dict(), args.save_path + 'epoch' + str(epoch)) latent_array = encoder(inputs).data[0].cpu().float().numpy() filename = 'latent_epoch' + str(epoch) np.save('./latent_saves/' + filename, latent_array) reconstructed_array = vae( inputs).data[0].cpu().float().numpy().reshape(21, 21) recon_filename = 'reconstructed_epoch' + str(epoch) np.save('./reconstruct_saves/' + recon_filename, reconstructed_array)
def train(model_dict): def update_current_state(current_state, state, channels): # current_state: [processes, channels*stack, height, width] state = torch.from_numpy( state).float() # (processes, channels, height, width) # if num_stack > 1: #first stack*channel-channel frames = last stack*channel-channel , so slide them forward current_state[:, :-channels] = current_state[:, channels:] current_state[:, -channels:] = state #last frame is now the new one return current_state def update_rewards(reward, done, final_rewards, episode_rewards, current_state): # Reward, Done: [P], [P] # final_rewards, episode_rewards: [P,1]. [P,1] # current_state: [P,C*S,H,W] reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float() #[P,1] episode_rewards += reward #keeps track of current episode cumulative reward masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) #[P,1] final_rewards *= masks #erase the ones that are done final_rewards += ( 1 - masks) * episode_rewards #set it to the cumulative episode reward episode_rewards *= masks #erase the done ones masks = masks.type(dtype) #cuda if current_state.dim() == 4: # if state is a frame/image current_state *= masks.unsqueeze(2).unsqueeze(2) #[P,1,1,1] else: current_state *= masks #restart the done ones, by setting the state to zero return reward, masks, final_rewards, episode_rewards, current_state num_frames = model_dict['num_frames'] cuda = model_dict['cuda'] which_gpu = model_dict['which_gpu'] num_steps = model_dict['num_steps'] num_processes = model_dict['num_processes'] seed = model_dict['seed'] env_name = model_dict['env'] save_dir = model_dict['save_to'] num_stack = model_dict['num_stack'] algo = model_dict['algo'] save_interval = model_dict['save_interval'] log_interval = model_dict['log_interval'] save_params = model_dict['save_params'] vid_ = model_dict['vid_'] gif_ = model_dict['gif_'] ls_ = model_dict['ls_'] vae_ = model_dict['vae_'] os.environ['OMP_NUM_THREADS'] = '1' os.environ['CUDA_VISIBLE_DEVICES'] = str(which_gpu) if cuda: torch.cuda.manual_seed(seed) dtype = torch.cuda.FloatTensor model_dict['dtype'] = dtype else: torch.manual_seed(seed) dtype = torch.FloatTensor model_dict['dtype'] = dtype # Create environments print(num_processes, 'processes') monitor_rewards_dir = os.path.join(save_dir, 'monitor_rewards') if not os.path.exists(monitor_rewards_dir): os.makedirs(monitor_rewards_dir) print('Made dir', monitor_rewards_dir) envs = SubprocVecEnv([ make_env(env_name, seed, i, monitor_rewards_dir) for i in range(num_processes) ]) if vid_: print('env for video') envs_video = make_env_monitor(env_name, save_dir) if gif_: print('env for gif') envs_gif = make_env_basic(env_name) if ls_: print('env for ls') envs_ls = make_env_basic(env_name) if vae_: print('env for vae') envs_vae = make_env_basic(env_name) obs_shape = envs.observation_space.shape # (channels, height, width) obs_shape = (obs_shape[0] * num_stack, *obs_shape[1:] ) # (channels*stack, height, width) shape_dim0 = envs.observation_space.shape[0] #channels model_dict['obs_shape'] = obs_shape model_dict['shape_dim0'] = shape_dim0 next_state_pred_ = 0 model_dict['next_state_pred_'] = next_state_pred_ # Create agent if algo == 'a2c': agent = a2c(envs, model_dict) print('init a2c agent') elif algo == 'ppo': agent = ppo(envs, model_dict) print('init ppo agent') elif algo == 'a2c_minibatch': agent = a2c_minibatch(envs, model_dict) print('init a2c_minibatch agent') elif algo == 'a2c_list_rollout': agent = a2c_list_rollout(envs, model_dict) print('init a2c_list_rollout agent') elif algo == 'a2c_with_var': agent = a2c_with_var(envs, model_dict) print('init a2c_with_var agent') # elif algo == 'a2c_bin_mask': # agent = a2c_with_var(envs, model_dict) # print ('init a2c_with_var agent') # agent = model_dict['agent'](envs, model_dict) # #Load model # if args.load_path != '': # # agent.actor_critic = torch.load(os.path.join(args.load_path)) # agent.actor_critic = torch.load(args.load_path).cuda() # print ('loaded ', args.load_path) # see_reward_episode = 0 # if 'Montez' in env_name and see_reward_episode: # states_list = [[] for i in range(num_processes)] # view_reward_episode(model_dict=model_dict, frames=[]) # dfasddsf if vae_: vae = VAE() vae.cuda() # Init state state = envs.reset() # (processes, channels, height, width) current_state = torch.zeros( num_processes, *obs_shape) # (processes, channels*stack, height, width) current_state = update_current_state( current_state, state, shape_dim0).type(dtype) #add the new frame, remove oldest agent.insert_first_state( current_state ) #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step # These are used to compute average rewards for all processes. episode_rewards = torch.zeros( [num_processes, 1]) #keeps track of current episode cumulative reward final_rewards = torch.zeros([num_processes, 1]) num_updates = int(num_frames) // num_steps // num_processes save_interval_num_updates = int(save_interval / num_processes / num_steps) # prev_action = Variable(torch.zeros([num_processes, 1]).type(torch.LongTensor)).cuda() #Begin training # count =0 start = time.time() start2 = time.time() for j in range(num_updates): for step in range(num_steps): # Act, [P,1], [P], [P,1], [P] # value, action = agent.act(Variable(agent.rollouts.states[step], volatile=True)) state_pytorch = Variable(agent.rollouts.states[step]) value, action, action_log_probs, dist_entropy = agent.act( state_pytorch) #, volatile=True)) # if next_state_pred_: # next_state_prediction = agent.actor_critic.predict_next_state2(state_pytorch, prev_action) # next_state_prediction = 0 # print (action_log_probs.size()) # print (dist_entropy.size()) # prev_action = action # print (next_state_prediction.size()) # [P,1,84,84] # fasd cpu_actions = action.data.squeeze(1).cpu().numpy() #[P] # cpu_actions = action.data.cpu().numpy() #[P] # print (actions.size()) # Step, S:[P,C,H,W], R:[P], D:[P] state, reward, done, info = envs.step(cpu_actions) reward_numpy = reward # Record rewards and update state reward, masks, final_rewards, episode_rewards, current_state = update_rewards( reward, done, final_rewards, episode_rewards, current_state) current_state = update_current_state(current_state, state, shape_dim0) # Agent record step # agent.insert_data(step, current_state, action.data, value.data, reward, masks, action_log_probs.data, dist_entropy.data) if next_state_pred_: agent.insert_data(step, current_state, action.data, value, reward, masks, action_log_probs, dist_entropy, next_state_prediction) #, done) agent.rollouts.insert_state_pred(next_state_prediction) else: agent.insert_data(step, current_state, action.data, value, reward, masks, action_log_probs, dist_entropy, 0) #, done) # if 'Montez' in env_name and see_reward_episode: # for state_i in range(len(state)): # if done[state_i]: # states_list[state_i] = [] # else: # states_list[state_i].append(np.squeeze(state[state_i])) # # print (state[state_i].shape) # # fasdf # # print (reward) # if reward_numpy[state_i] >0: # #plot the states of state_i # print (len(states_list[state_i])) # # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:]) # # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:]) # view_reward_episode(model_dict=model_dict, frames=states_list[state_i]) # fadsa # # and np.sum(agent.rollouts.rewards.cpu().numpy()) > 0 # # print (np.sum(agent.rollouts.rewards.cpu().numpy())) # # print (j) #Optimize agent agent.update() #agent.update(j,num_updates) batch = agent.rollouts.states # print (batch.size()) # [Steps+1,Processes,Stack,84,84] # remove first state since its repeated, its the last state of last episode # take the first state of the stack for each step #reshape to [P*S,84,84] batch = batch[1:] # [Steps,Processes,Stack,84,84] batch = batch[:, :, 0] # [Steps,Processes,84,84] batch = batch.contiguous().view(-1, 84, 84) # [Steps*Processes,84,84] # print (batch.size()) # fadsa # print (vae) elbo = vae.update(batch) agent.insert_first_state(agent.rollouts.states[-1]) # print (agent.state_pred_error.data.cpu().numpy()) # print ('save_interval_num_updates', save_interval_num_updates) # print ('num_updates', num_updates) # print ('j', j) total_num_steps = (j + 1) * num_processes * num_steps # if total_num_steps % save_interval == 0 and save_dir != "": if j % save_interval_num_updates == 0 and save_dir != "" and j != 0: #Save model if save_params: do_params(save_dir, agent, total_num_steps, model_dict) #make video if vid_: do_vid(envs_video, update_current_state, shape_dim0, dtype, agent, model_dict, total_num_steps) #make gif if gif_: do_gifs(envs_gif, agent, model_dict, update_current_state, update_rewards, total_num_steps) #make vae prob gif if vae_: do_prob_state(envs_vae, agent, model_dict, vae, update_current_state, total_num_steps) #Print updates if j % log_interval == 0: # and j!=0: end = time.time() to_print_info_string = "{}, {}, {:.1f}/{:.1f}/{:.1f}/{:.1f}, {}, {:.1f}, {:.2f}".format( j, total_num_steps, final_rewards.min(), final_rewards.median(), final_rewards.mean(), final_rewards.max(), int(total_num_steps / (end - start)), end - start, end - start2) elbo = "{:.2f}".format(elbo.data.cpu().numpy()[0]) if next_state_pred_: state_pred_error_print = "{:.2f}".format( agent.state_pred_error.data.cpu().numpy()[0]) print(to_print_info_string + ' ' + state_pred_error_print + ' ' + elbo) to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, pred_error, elbo" else: print(to_print_info_string + ' ' + elbo) to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, elbo" start2 = time.time() if j % (log_interval * 30) == 0: if ls_: do_ls(envs_ls, agent, model_dict, total_num_steps, update_current_state, update_rewards) # print("Upts, n_timesteps, min/med/mean/max, FPS, Time, Plot updated, LS updated") # print(to_print_info_string + ' LS recorded')#, agent.current_lr) # else: #update plots try: if ls_: update_ls_plot(model_dict) make_plots(model_dict) print(to_print_legend_string + " Plot updated") except: raise #pass print(to_print_legend_string) try: make_plots(model_dict) except: print()
import torch import torch.nn as nn import torch.nn.functional as F import torch.multiprocessing as mp import torch.optim as optim from this_util import * from vae import VAE is_cuda = True vae_model = VAE() vae_model.load_state_dict(torch.load(vae_model_path)) vae_model.eval() if is_cuda: vae_model.cuda() vae_model.is_cuda = True filelist = os.listdir(DATA_DIR) filelist.sort() N = len(filelist) z_list = [] action_list = [] for i in range(N): filename = filelist[i] raw_data = np.load(os.path.join(DATA_DIR, filename)) data = raw_data['obs'] data = torch.from_numpy(data) if is_cuda: data = data.cuda() t = vae_model(data)
print(len(dataset)) print(len(dataset[ii][0])) # single timepoint print(dataset[ii][0][0].shape) #action [1] a_t+1 print(dataset[ii][0][1].shape) #state [2,84,84] s_t state_dataset = [] for i in range(len(dataset)): for t in range(len(dataset[i])): state_dataset.append(dataset[i][t][1]) print(len(state_dataset)) print('Init VAE') vae = VAE() vae.cuda() load_ = 1 train_ = 1 viz_ = 1 if load_: load_epoch = 50 path_to_load_variables = home + '/Documents/tmp/breakout_2frames/vae_params' + str( load_epoch) + '.ckpt' vae.load_params(path_to_load_variables) epochs = 100 if load_: path_to_save_variables = home + '/Documents/tmp/breakout_2frames/vae_params' + str( epochs + load_epoch) + '.ckpt'
data_dim = dsize * dsize nr_mix = 10 # mean and scale for each components and weighting bt components (10+2*10) probs_size = (2 * nr_mix) + nr_mix dout = data_dim * probs_size latent_size = 64 encoder = Encoder(data_dim, latent_size) decoder = Decoder(latent_size, dout) vae = VAE(encoder, decoder, use_cuda) # square error is not the correct loss - for ordered input, # should use softmax for unordered input ( like mine ) if use_cuda: print("using gpu") vae = vae.cuda() vae.encoder = vae.encoder.cuda() vae.decoder = vae.decoder.cuda() opt = torch.optim.Adam(vae.parameters(), lr=1e-4) epoch = 0 data_train_loader = DataLoader(FroggerDataset( train_data_dir, transform=transforms.ToTensor(), limit=args.num_train_limit), batch_size=64, shuffle=True) data_test_loader = DataLoader(FroggerDataset( test_data_dir, transform=transforms.ToTensor()), batch_size=32, shuffle=True) test_data = data_test_loader
def main(): use_cuda = args.use_cuda half_precision = args.half_precision print("Cuda set to {} | Cuda availability: {}".format( use_cuda, torch.cuda.is_available())) experiment = "vae_latent3" #logger = SummaryWriter(log_dir='./logs', comment=experiment) train_data = UnlabeledContact( data='/home/ygx/data/fspeptide/fs_peptide.npy') print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 encoder = Encoder(input_size=input_size, latent_size=3) decoder = Decoder(latent_size=3, output_size=input_size) vae = VAE(encoder, decoder, use_cuda=use_cuda, half_precision=half_precision) #criterion = nn.BCELoss() if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() vae = vae.cuda() #criterion = criterion.cuda().half() if half_precision: encoder = encoder.half() decoder = decoder.half() vae = vae.half() optimizer = optim.SGD(vae.parameters(), lr=0.001) losses = AverageMeter() epoch_loss = 0 total_loss = 0 for epoch in range(100): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda() if half_precision: inputs = inputs.half() inputs = Variable(inputs) # Compute output optimizer.zero_grad() dec = vae(inputs) # Measure the loss #kl = kl_loss(vae.z_mean, vae.z_sigma) #loss = criterion(dec, inputs) #+ kl # Adding KL is caussing loss > 1 loss = loss_function(dec, inputs, vae.z_mean, vae.z_sigma) losses.update(loss.data[0], inputs.size(0)) # Compute the gradient loss.backward() optimizer.step() epoch_loss += loss.data[0] # Logging # Adding graph is a lot of overhead #logger.add_graph_onnx(vae) # log loss values every iteration #logger.add_scalar('data/(train)loss_val', losses.val, batch_idx + 1) #logger.add_scalar('data/(train)loss_avg', losses.avg, batch_idx + 1) # log the layers and layers gradient histogram and distributions #for tag, value in vae.named_parameters(): # tag = tag.replace('.', '/') # logger.add_histogram('model/(train)' + tag, to_numpy(value), batch_idx + 1) # logger.add_histogram('model/(train)' + tag + '/grad', to_numpy(value.grad), batch_idx + 1) # log the outputs of the autoencoder #logger.add_image('model/(train)output', make_grid(dec.data), batch_idx + 1) if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) #if epoch < 10: # Get latent encoding #latent_array = encoder(inputs).data[0].cpu().numpy() #filename = 'latent_epoch' + str(epoch) #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array) # Get reconstructed image #reconstructed_array = vae(inputs).data[0].cpu().numpy().reshape(21, 21) #recon_filename = 'reconstructed_epoch' + str(epoch) #np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename, reconstructed_array) if epoch % 10 == 0: torch.save(vae.state_dict(), args.save_path + 'epoch' + str(epoch)) #latent_array = encoder(inputs).data[0].cpu().numpy() #filename = 'latent_epoch' + str(epoch) #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array) reconstructed_array = vae( inputs).data[0].cpu().float().numpy().reshape(21, 21) recon_filename = 'reconstructed_epoch' + str(epoch) np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename, reconstructed_array)
def train(model_dict): def update_current_state(current_state, state, channels): # current_state: [processes, channels*stack, height, width] state = torch.from_numpy( state).float() # (processes, channels, height, width) # if num_stack > 1: #first stack*channel-channel frames = last stack*channel-channel , so slide them forward current_state[:, :-channels] = current_state[:, channels:] current_state[:, -channels:] = state #last frame is now the new one return current_state def update_rewards(reward, done, final_rewards, episode_rewards, current_state): # Reward, Done: [P], [P] # final_rewards, episode_rewards: [P,1]. [P,1] # current_state: [P,C*S,H,W] reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float() #[P,1] episode_rewards += reward #keeps track of current episode cumulative reward masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) #[P,1] final_rewards *= masks #erase the ones that are done final_rewards += ( 1 - masks) * episode_rewards #set it to the cumulative episode reward episode_rewards *= masks #erase the done ones masks = masks.type(dtype) #cuda if current_state.dim() == 4: # if state is a frame/image current_state *= masks.unsqueeze(2).unsqueeze(2) #[P,1,1,1] else: current_state *= masks #restart the done ones, by setting the state to zero return reward, masks, final_rewards, episode_rewards, current_state num_frames = model_dict['num_frames'] cuda = model_dict['cuda'] which_gpu = model_dict['which_gpu'] num_steps = model_dict['num_steps'] num_processes = model_dict['num_processes'] seed = model_dict['seed'] env_name = model_dict['env'] save_dir = model_dict['save_to'] num_stack = model_dict['num_stack'] algo = model_dict['algo'] save_interval = model_dict['save_interval'] log_interval = model_dict['log_interval'] save_params = model_dict['save_params'] vid_ = model_dict['vid_'] gif_ = model_dict['gif_'] ls_ = model_dict['ls_'] vae_ = model_dict['vae_'] explore_ = model_dict['explore_'] os.environ['OMP_NUM_THREADS'] = '1' os.environ['CUDA_VISIBLE_DEVICES'] = str(which_gpu) if cuda: torch.cuda.manual_seed(seed) dtype = torch.cuda.FloatTensor model_dict['dtype'] = dtype else: torch.manual_seed(seed) dtype = torch.FloatTensor model_dict['dtype'] = dtype # Create environments print(num_processes, 'processes') monitor_rewards_dir = os.path.join(save_dir, 'monitor_rewards') if not os.path.exists(monitor_rewards_dir): os.makedirs(monitor_rewards_dir) print('Made dir', monitor_rewards_dir) envs = SubprocVecEnv([ make_env(env_name, seed, i, monitor_rewards_dir) for i in range(num_processes) ]) if vid_: print('env for video') envs_video = make_env_monitor(env_name, save_dir) if gif_: print('env for gif') envs_gif = make_env_basic(env_name) if ls_: print('env for ls') envs_ls = make_env_basic(env_name) if vae_: print('env for vae') envs_vae = make_env_basic(env_name) obs_shape = envs.observation_space.shape # (channels, height, width) obs_shape = (obs_shape[0] * num_stack, *obs_shape[1:] ) # (channels*stack, height, width) shape_dim0 = envs.observation_space.shape[0] #channels model_dict['obs_shape'] = obs_shape model_dict['shape_dim0'] = shape_dim0 next_state_pred_ = 0 model_dict['next_state_pred_'] = next_state_pred_ # Create agent # if algo == 'a2c': # agent = a2c(envs, model_dict) # elif algo == 'ppo': # agent = ppo(envs, model_dict) # print ('init ppo agent') # elif algo == 'a2c_minibatch': # agent = a2c_minibatch(envs, model_dict) # print ('init a2c_minibatch agent') # elif algo == 'a2c_list_rollout': # agent = a2c_list_rollout(envs, model_dict) # print ('init a2c_list_rollout agent') # elif algo == 'a2c_with_var': # agent = a2c_with_var(envs, model_dict) # print ('init a2c_with_var agent') # elif algo == 'a2c_bin_mask': # agent = a2c_with_var(envs, model_dict) # print ('init a2c_with_var agent') # agent = model_dict['agent'](envs, model_dict) # #Load model # if args.load_path != '': # # agent.actor_critic = torch.load(os.path.join(args.load_path)) # agent.actor_critic = torch.load(args.load_path).cuda() # print ('loaded ', args.load_path) # see_reward_episode = 0 # if 'Montez' in env_name and see_reward_episode: # states_list = [[] for i in range(num_processes)] # view_reward_episode(model_dict=model_dict, frames=[]) # dfasddsf # if vae_: # vae = VAE() # vae.cuda() print('init exploit a2c agent') agent_exploit = a2c(envs, model_dict) if explore_: print('init explore a2c agent') agent_explore = a2c(envs, model_dict) print('init vae') vae = VAE() vae.cuda() # Init state state = envs.reset() # (processes, channels, height, width) current_state = torch.zeros( num_processes, *obs_shape) # (processes, channels*stack, height, width) current_state = update_current_state( current_state, state, shape_dim0).type(dtype) #add the new frame, remove oldest agent_exploit.insert_first_state( current_state ) #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step if explore_: agent_explore.insert_first_state( current_state ) #storage has states: (num_steps + 1, num_processes, *obs_shape), set first step # These are used to compute average rewards for all processes. episode_rewards = torch.zeros( [num_processes, 1]) #keeps track of current episode cumulative reward final_rewards = torch.zeros([num_processes, 1]) num_updates = int(num_frames) // num_steps // num_processes save_interval_num_updates = int(save_interval / num_processes / num_steps) # prev_action = Variable(torch.zeros([num_processes, 1]).type(torch.LongTensor)).cuda() # For normalizing the logprobs B = .99 m = torch.FloatTensor([-100.]).cuda() v = torch.FloatTensor([10000.]).cuda() # prev_reward = torch.ones(num_processes,1).cuda() if model_dict['init_exploit_processes'] == -1: init_exploit_processes = num_processes else: init_exploit_processes = model_dict['init_exploit_processes'] exploit_processes = init_exploit_processes # explore_processes = 16 all_frames = [] start = time.time() start2 = time.time() for j in range(num_updates): start3 = time.time() for step in range(num_steps): # start3 = time.time() state_pytorch = Variable(agent_exploit.rollouts.states[step] ) #, volatile=True) # [P,S,84,84] # exploit_state = state_pytorch[:exploit_processes] # explore_state = state_pytorch[exploit_processes:] u_value, u_action, u_action_log_probs, u_dist_entropy = agent_exploit.act( state_pytorch) if explore_: r_value, r_action, r_action_log_probs, r_dist_entropy = agent_explore.act( state_pytorch) u_cpu_actions = u_action.data.squeeze(1).cpu().numpy() #[P] if explore_: r_cpu_actions = r_action.data.squeeze(1).cpu().numpy() #[P] #Choose how many you want from each cpu_actions = np.concatenate((u_cpu_actions[:exploit_processes], r_cpu_actions[exploit_processes:]), 0) #[P] # cpu_actions = u_cpu_actions # before_step_time = time.time() - start3 # Step, S:[P,C,H,W], R:[P], D:[P] # start3 = time.time() state, reward, done, info = envs.step(cpu_actions) # step_time = time.time() - start3 # reward_numpy = reward # print (reward) # # for trainign vae. # for p in range(len(state)): # # print (state[p].shape) #[1,84,84] # # fasad # all_frames.append(state[p]) # print (len(all_frames)) # if len(all_frames) == 10000: # pickle.dump( all_frames, open(home + '/Documents/tmp/montezum_frames.pkl' , "wb" ) ) # print ('saved pkl') # fafaadsfs # start3 = time.time() # Record rewards and update state reward, masks, final_rewards, episode_rewards, current_state = update_rewards( reward, done, final_rewards, episode_rewards, current_state) current_state = update_current_state(current_state, state, shape_dim0) # current_state_u = current_state[:exploit_processes] # current_state_r = current_state[exploit_processes:] #Insert data for exploit agent agent_exploit.insert_data(step, current_state, u_action.data, u_value, reward, masks, u_action_log_probs, u_dist_entropy, 0) #, done) if explore_: # Insert log prob for explore agent batch = state_pytorch[:, -1] #last of stack batch = batch.contiguous() # [P,84,84] elbo = vae.forward2(batch, k=10) #[P] elbo = elbo.view(-1, 1).data #[P,1] elbo = (elbo - m) / torch.sqrt(v) elbo = torch.clamp(elbo, max=.01) agent_explore.insert_data(step, current_state, r_action.data, r_value, -elbo, masks, r_action_log_probs, r_dist_entropy, 0) #, done) #update m and v m = B * m + (1. - B) * elbo.mean() v = B * v + (1. - B) * elbo.pow(2).mean() if elbo.mean() < -9000.: print(elbo) print(reward) print(elbo.mean()) print(elbo.pow(2).mean()) fadsads # after_step_time = time.time() - start3 # if 'Montez' in env_name and see_reward_episode: # for state_i in range(len(state)): # if done[state_i]: # states_list[state_i] = [] # else: # states_list[state_i].append(np.squeeze(state[state_i])) # # print (state[state_i].shape) # # fasdf # # print (reward) # if reward_numpy[state_i] >0: # #plot the states of state_i # print (len(states_list[state_i])) # # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:]) # # view_reward_episode(model_dict=model_dict, frames=states_list[state_i][len(states_list[state_i])-100:]) # view_reward_episode(model_dict=model_dict, frames=states_list[state_i]) # fadsa # # and np.sum(agent.rollouts.rewards.cpu().numpy()) > 0 # # print (np.sum(agent.rollouts.rewards.cpu().numpy())) # # print (j) steps_time = time.time() - start3 start3 = time.time() #Optimize agents agent_exploit.update() #agent.update(j,num_updates) if explore_: agent_explore.update() #agent.update(j,num_updates) #Optimize vae batch = agent_exploit.rollouts.states batch = batch[1:] # [Steps,Processes,Stack,84,84] batch = batch[:, :, 0] # [Steps,Processes,84,84] batch = batch.contiguous().view(-1, 84, 84) # [Steps*Processes,84,84] elbo = vae.update(batch) #Insert state agent_exploit.insert_first_state(agent_exploit.rollouts.states[-1]) if explore_: agent_explore.insert_first_state(agent_explore.rollouts.states[-1]) total_num_steps = (j + 1) * num_processes * num_steps #Change number of explore vs exploit if model_dict['init_exploit_processes'] != -1 and model_dict[ 'inc_exploiters_over'] != -1: frac_step = np.minimum((total_num_steps + 1.) / float(model_dict['inc_exploiters_over']), 1.) #fraction of steps aaa = int((num_processes - init_exploit_processes) * frac_step) exploit_processes = np.minimum(init_exploit_processes + aaa + 1, num_processes) update_time = time.time() - start3 # agent_exploit.rollouts.reset_lists() # agent_explore.rollouts.reset_lists() # print ('init ', init_exploit_processes) # print ('cur ', exploit_processes) # print ('frac_step', frac_step) # print ('aaa', aaa) # print (agent.state_pred_error.data.cpu().numpy()) # print ('save_interval_num_updates', save_interval_num_updates) # print ('num_updates', num_updates) # print ('j', j) # if total_num_steps % save_interval == 0 and save_dir != "": if j % save_interval_num_updates == 0 and save_dir != "" and j != 0: #Save model if save_params: do_params(save_dir, agent, total_num_steps, model_dict) #make video if vid_: do_vid(envs_video, update_current_state, shape_dim0, dtype, agent, model_dict, total_num_steps) #make gif if gif_: do_gifs(envs_gif, agent, model_dict, update_current_state, update_rewards, total_num_steps) # #make vae prob gif if vae_: # do_prob_state(envs_vae, agent, model_dict, vae, update_current_state, total_num_steps) # do_gifs2(envs_vae, agent_exploit, vae, model_dict, update_current_state, update_rewards, total_num_steps) do_gifs3(envs_vae, agent_exploit, vae, model_dict, update_current_state, update_rewards, total_num_steps) #Print updates if j % log_interval == 0: # and j!=0: end = time.time() to_print_info_string = "{}, {}, {:.1f}/{:.1f}/{:.1f}/{:.1f}, {}, {:.1f}, {:.2f}".format( j, total_num_steps, final_rewards.min(), final_rewards.median(), final_rewards.mean(), final_rewards.max(), int(total_num_steps / (end - start)), end - start, end - start2) elbo = "{:.2f}".format(elbo.data.cpu().numpy()[0]) # elbo = "1" steps_time = "{:.3f}".format(steps_time) update_time = "{:.3f}".format(update_time) # if next_state_pred_: # state_pred_error_print = "{:.2f}".format(agent.state_pred_error.data.cpu().numpy()[0]) # print(to_print_info_string+' '+state_pred_error_print+' '+elbo) # to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, pred_error, elbo" # else: # print(to_print_info_string+' '+elbo) # print(to_print_info_string+' '+elbo+' '+str(exploit_processes)+' '+str(before_step_time)+' '+str(step_time)+' '+str(after_step_time))#, value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy()) print( to_print_info_string + ' ' + elbo + ' ' + str(exploit_processes) ) #+' '+steps_time+' '+update_time)#, value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy()) # print (value[0].data.cpu().numpy(), m.cpu().numpy(), v.cpu().numpy()) to_print_legend_string = "Upts, n_timesteps, min/med/mean/max, FPS, total_T, step_T, elbo, Exploit_Procs" start2 = time.time() if j % (log_interval * 30) == 0: if ls_: # do_ls(envs_ls, agent, model_dict, total_num_steps, update_current_state, update_rewards) do_ls_2(envs_ls, agent_explore, model_dict, total_num_steps, update_current_state, update_rewards, vae) # update_ls_plot(model_dict) update_ls_plot_2(model_dict) print('updated ls') # print("Upts, n_timesteps, min/med/mean/max, FPS, Time, Plot updated, LS updated") # print(to_print_info_string + ' LS recorded')#, agent.current_lr) # else: #update plots # if ls_: try: start3 = time.time() make_plots(model_dict) print(to_print_legend_string + " Plot updated ") #+str(time.time() - start3)) except: raise #pass print(to_print_legend_string) try: make_plots(model_dict) except: print()
def train_vae(logger=None): out_dir, listdir, featslistdir = get_dirpaths(args) batchsize = args.batchsize hiddensize = args.hiddensize nmix = args.nmix nepochs = args.epochs data = colordata(\ os.path.join(out_dir, 'images'), \ listdir=listdir,\ featslistdir=featslistdir, split='train') nbatches = np.int_(np.floor(data.img_num/batchsize)) data_loader = DataLoader(dataset=data, num_workers=args.nthreads,\ batch_size=batchsize, shuffle=True, drop_last=True) model = VAE() model.cuda() model.train(True) optimizer = optim.Adam(model.parameters(), lr=5e-5) itr_idx = 0 for epochs in range(nepochs): train_loss = 0. for batch_idx, (batch, batch_recon_const, batch_weights, batch_recon_const_outres, _) in \ tqdm(enumerate(data_loader), total=nbatches): input_color = Variable(batch).cuda() lossweights = Variable(batch_weights).cuda() lossweights = lossweights.view(batchsize, -1) input_greylevel = Variable(batch_recon_const).cuda() z = Variable(torch.randn(batchsize, hiddensize)) optimizer.zero_grad() mu, logvar, color_out = model(input_color, input_greylevel, z) kl_loss, recon_loss, recon_loss_l2 = \ vae_loss(mu, logvar, color_out, input_color, lossweights, batchsize) loss = kl_loss.mul(1e-2)+recon_loss recon_loss_l2.detach() loss.backward() optimizer.step() train_loss = train_loss + recon_loss_l2.data[0] if(logger): logger.update_plot(itr_idx, \ [kl_loss.data[0], recon_loss.data[0], recon_loss_l2.data[0]], \ plot_type='vae') itr_idx += 1 if(batch_idx % args.logstep == 0): data.saveoutput_gt(color_out.cpu().data.numpy(), \ batch.numpy(), \ 'train_%05d_%05d' % (epochs, batch_idx), \ batchsize, \ net_recon_const=batch_recon_const_outres.numpy()) train_loss = (train_loss*1.)/(nbatches) print('[DEBUG] VAE Train Loss, epoch %d has loss %f' % (epochs, train_loss)) test_loss = test_vae(model) if(logger): logger.update_test_plot(epochs, test_loss) print('[DEBUG] VAE Test Loss, epoch %d has loss %f' % (epochs, test_loss)) torch.save(model.state_dict(), '%s/models/model_vae.pth' % (out_dir))
print (dataset[ii][0][0].shape) #action [1] a_t+1 print (dataset[ii][0][1].shape) #state [2,84,84] s_t state_dataset = [] for i in range(len(dataset)): for t in range(len(dataset[i])): state_dataset.append(dataset[i][t][1]) print (len(state_dataset)) print('Init VAE') vae = VAE() vae.cuda() load_ = 1 train_ = 1 viz_ = 1 if load_: load_epoch = 50 path_to_load_variables = home+'/Documents/tmp/breakout_2frames/vae_params'+str(load_epoch)+'.ckpt' vae.load_params(path_to_load_variables) epochs = 100 if load_: path_to_save_variables = home+'/Documents/tmp/breakout_2frames/vae_params'+str(epochs+load_epoch)+'.ckpt' else:
def main6(): # vae test doc = Document(content=[[ 'to', 'the', 'editor', 're', 'for', 'women', 'worried', 'about', 'fertility', 'egg', 'bank', 'is', 'a', 'new', 'option', 'sept', '00', 'imagine', 'my', 'joy', 'in', 'reading', 'the', 'morning', 'newspapers', 'on', 'the', 'day', 'of', 'my', '00th', 'birthday', 'and', 'finding', 'not', 'one', 'but', 'two', 'articles', 'on', 'how', 'women', 's', 'fertility', 'drops', 'off', 'precipitously', 'after', 'age', '00' ], [ 'one', 'in', 'the', 'times', 'and', 'one', 'in', 'another', 'newspaper' ], ['i', 'sense', 'a', 'conspiracy', 'here'], [ 'have', 'you', 'been', 'talking', 'to', 'my', 'mother', 'in', 'law' ], ['laura', 'heymann', 'washington']], summary=[[ 'laura', 'heymann', 'letter', 'on', 'sept', '00', 'article', 'about', 'using', 'egg', 'bank', 'to', 'prolong', 'fertility', 'expresses', 'ironic', 'humor', 'about', 'her', 'age', 'and', 'chances', 'of', 'becoming', 'pregnant' ]], label=[0.01] * 100, label_idx=[0.01] * 100) torch.manual_seed(233) torch.cuda.set_device(0) args = get_args() if args.data == "nyt": vocab_file = "/home/ml/lyu40/PycharmProjects/data/nyt/lda_domains/preprocessed/vocab_100d.p" with open(vocab_file, "rb") as f: vocab = pickle.load(f, encoding='latin1') else: vocab_file = '/home/ml/ydong26/data/CNNDM/CNN_DM_pickle_data/vocab_100d.p' with open(vocab_file, "rb") as f: vocab = pickle.load(f, encoding='latin1') config = Config( vocab_size=vocab.embedding.shape[0], embedding_dim=vocab.embedding.shape[1], category_size=args.category_size, category_dim=50, word_input_size=100, sent_input_size=2 * args.hidden, word_GRU_hidden_units=args.hidden, sent_GRU_hidden_units=args.hidden, pretrained_embedding=vocab.embedding, word2id=vocab.w2i, id2word=vocab.i2w, ) model = VAE(config) if torch.cuda.is_available(): model.cuda() train_loss = 0 optimizer = optim.Adam(model.parameters(), lr=1e-3) x = prepare_data( doc, vocab.w2i ) # list of tokens ex.x=[[1,2,1],[1,1]] x = Variable(torch.from_numpy(x)).cuda() sents = Variable(torch.from_numpy(x)).cuda() optimizer.zero_grad() loss = 0 for sent in sents: recon_batch, mu, logvar = model(sent.float()) loss += loss_function(recon_batch, sent, mu, logvar) loss.backward() train_loss += loss.data[0] optimizer.step()