def test(model: vae.VAE, real_data): model.eval() real_data = real_data.to(model.device).unsqueeze(1).float() / 255. recon_data = model(real_data)[3] plot_grid(torch.cat([real_data.detach()[:32], recon_data.detach()[:32]], dim=0), figsize=(8, 8), gridspec_kw=dict(wspace=0, hspace=0)) plt.show()
def __init__(self, directory): vae_file = join(directory, 'vae', 'best.tar') rnn_file = join(directory, 'mdrnn', 'best.tar') assert exists(vae_file), "No VAE model in the directory..." assert exists(rnn_file), "No MDRNN model in the directory..." # spaces self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([1, 1, 1])) self.observation_space = spaces.Box(low=0, high=255, shape=(RED_SIZE, RED_SIZE, 3), dtype=np.uint8) # load VAE vae = VAE(3, LSIZE) vae_state = torch.load(vae_file, map_location=lambda storage, location: storage) print("Loading VAE at epoch {}, " "with test error {}...".format(vae_state['epoch'], vae_state['precision'])) vae.load_state_dict(vae_state['state_dict']) self._decoder = vae.decoder # load MDRNN self._rnn = MDRNNCell(32, 3, RSIZE, 5) rnn_state = torch.load(rnn_file, map_location=lambda storage, location: storage) print("Loading MDRNN at epoch {}, " "with test error {}...".format(rnn_state['epoch'], rnn_state['precision'])) rnn_state_dict = { k.strip('_l0'): v for k, v in rnn_state['state_dict'].items() } self._rnn.load_state_dict(rnn_state_dict) # init state self._lstate = torch.randn(1, LSIZE) self._hstate = 2 * [torch.zeros(1, RSIZE)] # obs self._obs = None self._visual_obs = None # rendering self.monitor = None self.figure = None
def hvae_from_args(args): if (args.net_type == 'vae'): net = VAE(latent_size=args.latent_size, img_size=args.IM_SIZE, layer_sizes=args.layer_sizes) sizes_str = "_".join(str(x) for x in args.layer_sizes) file_name = 'VAE-' + str(sizes_str) + '-' + str( args.latent_size) + '-' + str(args.dataset) if (args.net_type == 'CVAE_ART'): net = CVAE_ART(latent_size=args.latent_size, img_size=args.IM_SIZE, num_labels=args.num_labels) file_name = 'CVAE_ART-' + str(args.latent_size) + '-' + str( args.IM_SIZE) elif (args.net_type == 'ConvVAE2d'): if (args.small_net_type == 'CVAE_SMALL'): small_net = CVAE_SMALL(latent_size=args.latent_size_small_vae, img_size=args.cvae_input_sz, num_labels=args.num_labels) net = ConvVAE2d(cvae_small=small_net, cvae_input_sz=args.cvae_input_sz, stride=args.stride, img_size=args.IM_SIZE) file_name = 'ConvVAE2d-' + str(args.IM_SIZE) + '-' + str( args.stride) + '-' + str(args.cvae_input_sz) + '-' + str( args.latent_size_small_vae) else: print('Error : Wrong net type') sys.exit(0) return net, file_name
def init_model(args): if args.flow == 'no_flow': model = VAE(args).to(args.device) elif args.flow == 'boosted': model = BoostedVAE(args).to(args.device) elif args.flow == 'planar': model = PlanarVAE(args).to(args.device) elif args.flow == 'radial': model = RadialVAE(args).to(args.device) elif args.flow == 'liniaf': model = LinIAFVAE(args).to(args.device) elif args.flow == 'affine': model = AffineVAE(args).to(args.device) elif args.flow == 'nlsq': model = NLSqVAE(args).to(args.device) elif args.flow == 'iaf': model = IAFVAE(args).to(args.device) elif args.flow == "realnvp": model = RealNVPVAE(args).to(args.device) elif args.flow == 'orthogonal': model = OrthogonalSylvesterVAE(args).to(args.device) elif args.flow == 'householder': model = HouseholderSylvesterVAE(args).to(args.device) elif args.flow == 'triangular': model = TriangularSylvesterVAE(args).to(args.device) else: raise ValueError('Invalid flow choice') return model
def main(_): if FLAGS.network == 'vae': model = VAE(mode=FLAGS.mode, batch_size=FLAGS.batch_size, latent_dim=FLAGS.latent_dim) solver = VAE_Solver(model, batch_size=FLAGS.batch_size, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'reconstruct': solver.reconstruct() elif FLAGS.mode == 'sample': solver.sample() elif FLAGS.mode == 'encode': solver.encode() elif FLAGS.network == 'gan': z_dim = 100 model = GAN(mode=FLAGS.mode) solver = GAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'sample': solver.sample() elif FLAGS.network == 'acgan': z_dim = 128 feature_class = 'Smiling' model = ACGAN(mode=FLAGS.mode, batch_size=FLAGS.batch_size) solver = ACGAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, feature_class=feature_class, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'sample': solver.sample()
def main(): parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') # parser.add_argument('--env-type', default='ant_semicircle_sparse') parser.add_argument('--env-type', default='point_robot_wind') # parser.add_argument('--env-type', default='escape_room') args, rest_args = parser.parse_known_args() env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) elif env == 'escape_room': args = args_point_robot_barrier.get_args(rest_args) elif env == 'point_robot_wind': args = args_point_robot_rand_params.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) args, env = off_utl.expand_args(args) dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy') # dataset, goals = off_utl.load_dataset(args) if args.hindsight_relabelling: print('Perform reward relabelling...') dataset, goals = off_utl.mix_task_rollouts(dataset, env, goals, args) if args.policy_replaying: mix_dataset, mix_goals = off_utl.load_replaying_dataset(data_dir=args.replaying_data_dir, args=args) print('Perform policy replaying...') dataset, goals = off_utl.mix_policy_rollouts(dataset, goals, mix_dataset, mix_goals, args) # vis test tasks # vis_train_tasks(env.unwrapped, goals) # not with GridNavi if args.save_model: dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \ and args.save_dir_prefix is not None else '' args.full_save_path = os.path.join(args.save_dir, args.env_name, dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S')) os.makedirs(args.full_save_path, exist_ok=True) config_utl.save_config_file(args, args.full_save_path) vae = VAE(args) train(vae, dataset, goals, args)
def check(): noise = torch.randn((100,32)) model = VAE() model.load_state_dict(torch.load("weights/vae/z25.pth")) out = model.decoder(noise) fig, ax = plt.subplots(nrows=10, ncols=10) plt.axis('off') i=0 for row in ax: for col in row: col.imshow(out[i].view(28,28).detach().cpu().numpy(),cmap='gray') col.set_axis_off() i+=1 plt.show()
def train(): model = VAE() loss_fn = NegativeELBO() optimizer = Adam(model.parameters(),lr=0.001) dataloader = Mnist() model.to(device) for i in range(30): tots = 0 for batch_id,(x,_) in enumerate(dataloader): if torch.Size([784,0]) == x.shape: break x = x.t() optimizer.zero_grad() out,mean,log_variance = model(x) loss = loss_fn(x,out,mean,log_variance) loss.backward() optimizer.step() tots+=loss.item() if(batch_id%50==0): print(batch_id,loss.item()/100,"\t",tots/(batch_id*100+1)) print("\n",i,tots/60000,"\n") torch.save(model.state_dict(),"weights/vae/z25.pth")
def __init__(self, directory): vae_file = join(directory, 'vae', 'best.tar') rnn_file = join(directory, 'mdrnn', 'best.tar') assert exists(vae_file), "No VAE model in the directory..." assert exists(rnn_file), "No MDRNN model in the directory..." # spaces self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([1, 1, 1])) self.observation_space = spaces.Box(low=0, high=255, shape=(RED_SIZE, RED_SIZE, 3), dtype=np.uint8) # load VAE vae = VAE(3, LSIZE) vae_state = torch.load(vae_file, map_location=lambda storage, location: storage) print("Loading VAE at epoch {}, " "with test error {}...".format( vae_state['epoch'], vae_state['precision'])) vae.load_state_dict(vae_state['state_dict']) self._decoder = vae.decoder # load MDRNN self._rnn = MDRNNCell(32, 3, RSIZE, 5) rnn_state = torch.load(rnn_file, map_location=lambda storage, location: storage) print("Loading MDRNN at epoch {}, " "with test error {}...".format( rnn_state['epoch'], rnn_state['precision'])) rnn_state_dict = {k.strip('_l0'): v for k, v in rnn_state['state_dict'].items()} self._rnn.load_state_dict(rnn_state_dict) # init state self._lstate = torch.randn(1, LSIZE) self._hstate = 2 * [torch.zeros(1, RSIZE)] # obs self._obs = None self._visual_obs = None # rendering self.monitor = None self.figure = None
def offline_experiment(doodad_config, variant): save_doodad_config(doodad_config) parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default='ant_semicircle_sparse') args, rest_args = parser.parse_known_args(args=[]) env = args.env_type # --- GridWorld --- if env == 'gridworld': args = args_gridworld.get_args(rest_args) # --- PointRobot --- elif env == 'point_robot_sparse': args = args_point_robot_sparse.get_args(rest_args) # --- Mujoco --- elif env == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) elif env == 'ant_semicircle_sparse': args = args_ant_semicircle_sparse.get_args(rest_args) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) vae_args = config_utl.load_config_file( os.path.join(args.vae_dir, args.env_name, args.vae_model_name, 'online_config.json')) args = config_utl.merge_configs( vae_args, args) # order of input to this function is important # Transform data BAMDP (state relabelling) if args.transform_data_bamdp: # load VAE for state relabelling vae_models_path = os.path.join(args.vae_dir, args.env_name, args.vae_model_name, 'models') vae = VAE(args) off_utl.load_trained_vae(vae, vae_models_path) # load data and relabel save_data_path = os.path.join(args.main_data_dir, args.env_name, args.relabelled_data_dir) os.makedirs(save_data_path) dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy') bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds( dataset, vae, args) # save relabelled data off_utl.save_dataset(save_data_path, bamdp_dataset, goals) learner = OfflineMetaLearner(args) learner.train()
def test_vae(image_shape, batch_size=128, hid_dim=2): x = torch.zeros(batch_size, *image_shape) encoder = VAE(image_shape, hid_dim) mean_image, sampled_image, logits, z, mean, stddev = encoder(x) assert mean_image.shape == x.shape assert sampled_image.shape == x.shape assert logits.shape == x.shape assert z.shape == (batch_size, hid_dim) assert mean.shape == (batch_size, hid_dim) assert stddev.shape == (batch_size, hid_dim)
def _build_vae(self): vae = VAE(hidden_units=512, latent_space_dim=100, num_input_channels=self._num_input_channels, conditional=self.conditional, num_labels=self.num_labels, device=self.device) if self.checkpoint_path is not None: vae.load_state_dict(torch.load(self.checkpoint_path)) vae.to(device=self.device) return vae
def __init__(self, timelimit, pop_size, device): self.pop_size = pop_size self.truncation_threshold = int(pop_size / 2) # Should be dividable by two self.P = [] # unique GA id self.init_time = datetime.now().strftime("%Y%m%d_%H%M%S") # load configuration params with open('config/creature.json') as f: config = json.load(f) model_fromdisk = config.get('vae.model.fromdisk') model_path = config.get('vae.model.path') latent_size = config.get('vae.latent.size') obs_size = config.get('vae.obs.size') num_effectors = config.get('joints.size') + config.get( 'brushes.size') input_size = latent_size + num_effectors output_size = num_effectors cpg_enabled = config.get('cpg.enabled') if cpg_enabled: input_size += 1 output_size += 1 # load vision module from models.vae import VAE vae = VAE(latent_size).cuda() if model_fromdisk: vae.load_state_dict(torch.load(model_path)) vae.eval() # inference mode print(f'Loaded VAE model {model_path} from disk') print(f'Generating initial population of {pop_size} candidates...') # initialize population from train import GAIndividual for _ in range(pop_size): self.P.append( GAIndividual(self.init_time, input_size, output_size, obs_size, compressor=vae, cpg_enabled=cpg_enabled, device=device, time_limit=timelimit)) # report controller parameters self.num_controller_params = input_size * output_size + output_size print(f'Number of controller parameters: {self.num_controller_params}')
def __init__(self, num_epochs=50, batch_size=512, lr=1e-3, data_path=None, \ ckpt_save_path='.', model_type='cnn', schedule=0, gamma=0.5, train_mode='default'): self.train_mode = train_mode self.ckpt_save_path = ckpt_save_path print(f'Checkpoints will be stored at {ckpt_save_path}') """ Hyperparameters """ self.num_epochs = num_epochs self.batch_size = batch_size self.lr = lr self.criterion = nn.MSELoss() """ Set up DataLoader & Sampler """ x = torch.from_numpy(data_path) self.dataset = CustomTensorDataset(x) t_len = int(len(self.dataset) * 0.9) v_len = len(self.dataset) - t_len train_set, valid_set = random_split(self.dataset, [t_len, v_len]) print( f'Train set: {len(train_set)} | Validation set: {len(valid_set)}') self.train_sampler = RandomSampler(train_set) self.valid_sampler = SequentialSampler(valid_set) self.train_dataloader = DataLoader(train_set, sampler=self.train_sampler, batch_size=self.batch_size) self.valid_dataloader = DataLoader(valid_set, sampler=self.valid_sampler, batch_size=self.batch_size) """ Select model """ model_choices = { 'cnn': CNN_AutoEncoder(), 'vae': VAE(), } self.model_type = model_type self.model = model_choices[self.model_type].cuda() print(f'Training model: {model_type}') self.optimizer = Adam(self.model.parameters(), lr=self.lr) self.scheduler = None self.schedule = schedule self.gamma = gamma if schedule != 0: # Enable lr_scheduler self.scheduler = lr_scheduler.StepLR(self.optimizer, step_size=self.schedule, gamma=self.gamma) print( f"Enabled lr_scheduler with step_size={schedule}, gamma={gamma}" )
def load_net(model_loc, args=None): model_file = Path(model_loc).name model_name = model_file.split('-')[0] if (model_name == 'CVAE'): model = CVAE( num_labels=int(model_file.split('-')[4].split('_')[0]), latent_size=int(model_file.split('-')[2]), img_size=32, layer_sizes=[int(i) for i in model_file.split('-')[1].split('_')]) elif (model_name == 'VAE'): model = VAE( latent_size=int(model_file.split('-')[2]), img_size=32, layer_sizes=[int(i) for i in model_file.split('-')[1].split('_')]) elif (model_name == 'FEAT_VAE_MNIST'): model = FEAT_VAE_MNIST( classifier_model=load_net(args.encoding_model_loc).to(args.device), num_features=int(model_file.split('-')[2].split('_')[0]), latent_size=int(model_file.split('-')[1].split('_')[0])) elif (model_name == 'ConvVAE2d'): latent_size_small_vae = int(model_file.split('-')[4].split('_')[0]) cvae_input_sz = int(model_file.split('-')[3]) stride = int(model_file.split('-')[2]) IM_SIZE = int(model_file.split('-')[1]) small_net = CVAE_SMALL(latent_size=latent_size_small_vae, img_size=cvae_input_sz, num_labels=11) model = ConvVAE2d(cvae_small=small_net, cvae_input_sz=cvae_input_sz, stride=stride, img_size=IM_SIZE) else: print(f'Error : {model_file} not found') sys.exit(0) model.load_state_dict(torch.load(model_loc)['state_dict']) return model
def main(): flags = tf.flags flags.DEFINE_integer("latent_dim", 64, "Dimension of latent space.") flags.DEFINE_integer("obs_dim", 12288, "Dimension of observation space.") flags.DEFINE_integer("batch_size", 64, "Batch size.") flags.DEFINE_integer("epochs", 500, "As it said") flags.DEFINE_integer( "updates_per_epoch", 100, "Really just can set to 1 if you don't like mini-batch.") FLAGS = flags.FLAGS kwargs = { 'latent_dim': FLAGS.latent_dim, 'batch_size': FLAGS.batch_size, 'observation_dim': FLAGS.obs_dim, 'encoder': conv_anime_encoder, 'decoder': conv_anime_decoder, 'observation_distribution': 'Gaussian' } vae = VAE(**kwargs) provider = Anime() tbar = tqdm(range(FLAGS.epochs)) for epoch in tbar: training_loss = 0. for _ in range(FLAGS.updates_per_epoch): x = provider.next_batch(FLAGS.batch_size) loss = vae.update(x) training_loss += loss training_loss /= FLAGS.updates_per_epoch s = "Loss: {:.4f}".format(training_loss) tbar.set_description(s) z = np.random.normal(size=[FLAGS.batch_size, FLAGS.latent_dim]) samples = vae.z2x(z)[0] show_samples(samples, 8, 8, [64, 64, 3], name='samples') vae.save_generator('weights/vae_anime/generator')
def main(): flags = tf.flags flags.DEFINE_integer("latent_dim", 2, "Dimension of latent space.") flags.DEFINE_integer("batch_size", 128, "Batch size.") flags.DEFINE_integer("epochs", 500, "As it said") flags.DEFINE_integer("updates_per_epoch", 100, "Really just can set to 1 if you don't like mini-batch.") flags.DEFINE_string("data_dir", 'mnist', "Tensorflow demo data download position.") FLAGS = flags.FLAGS kwargs = { 'latent_dim': FLAGS.latent_dim, 'batch_size': FLAGS.batch_size, 'encoder': fc_mnist_encoder, 'decoder': fc_mnist_decoder } vae = VAE(**kwargs) mnist = input_data.read_data_sets(train_dir=FLAGS.data_dir) tbar = tqdm(range(FLAGS.epochs)) for epoch in tbar: training_loss = 0. for _ in range(FLAGS.updates_per_epoch): x, _ = mnist.train.next_batch(FLAGS.batch_size) loss = vae.update(x) training_loss += loss training_loss /= FLAGS.updates_per_epoch s = "Loss: {:.4f}".format(training_loss) tbar.set_description(s) z = np.random.normal(size=[FLAGS.batch_size, FLAGS.latent_dim]) samples = vae.z2x(z)[0] show_samples(samples, 10, 10, [28, 28], name='samples') show_latent_scatter(vae, mnist, name='latent') vae.save_generator('weights/vae_mnist/generator')
def load_vae(self): self.vae = VAE(self.args) vae_models_path = os.path.join(self.args.vae_dir, self.args.env_name, self.args.vae_model_name, 'models') off_utl.load_trained_vae(self.vae, vae_models_path)
# constants ASIZE = 1 BSIZE = 16 SEQ_LEN = 140 # 4 seconds LSIZE = 64 RSIZE = 512 epochs = 200 # Load VAE vae_file = join(args.originallogdir, 'vae', 'best.tar') assert exists(vae_file), "No trained VAE in the originallogdir..." state = torch.load(vae_file, map_location={'cuda:0': str(device)}) print("Loading VAE at epoch {} " "with test error {}".format(state['epoch'], state['precision'])) vae = VAE(3, LSIZE).to(device) vae.load_state_dict(state['state_dict']) vae_optimizer = torch.optim.Adam(vae.parameters()) vae_scheduler = ReduceLROnPlateau(vae_optimizer, 'min', factor=0.5, patience=5) # Load RNN rnn_dir = join(args.originallogdir, 'mdrnn') rnn_file = join(rnn_dir, 'best.tar') assert exists(rnn_file), 'No trained MDNRNN in the originallogdir...' mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5) mdrnn.to(device) mdrnn_optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9) mdrnn_scheduler = ReduceLROnPlateau(mdrnn_optimizer, 'min', factor=0.5, patience=5)
transforms.ToPILImage(), transforms.Resize((RED_SIZE, RED_SIZE)), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((RED_SIZE, RED_SIZE)), transforms.ToTensor(), ]) trained=0 #model = VAE(3, LSIZE).to(device) model=VAE(3, LSIZE) model=torch.nn.DataParallel(model,device_ids=range(8)) model.cuda() optimizer = optim.Adam(model.parameters(),lr=learning_rate,betas=(0.9,0.999)) model_p=VAE_a(7, LSIZE) model_p=torch.nn.DataParallel(model_p,device_ids=range(8)) model_p.cuda() optimizer_p = optim.Adam(model_p.parameters(),lr=learning_rate,betas=(0.9,0.999)) controller=Controller(LSIZE,3) controller=torch.nn.DataParallel(controller,device_ids=range(8)) controller=controller.cuda() optimizer_a = optim.SGD(controller.parameters(),lr=learning_rate*10) # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) # earlystopping = EarlyStopping('min', patience=30) vis = visdom.Visdom(env='pa_train')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # constants BSIZE = 16 SEQ_LEN = 32 epochs = 30 # Loading VAE vae_file = join(args.logdir, 'vae', 'best.tar') assert exists(vae_file), "No trained VAE in the logdir..." state = torch.load(vae_file) print("Loading VAE at epoch {} " "with test error {}".format( state['epoch'], state['precision'])) vae = VAE(3, LSIZE).to(device) vae.load_state_dict(state['state_dict']) # Loading model rnn_dir = join(args.logdir, 'mdrnn') rnn_file = join(rnn_dir, 'best.tar') if not exists(rnn_dir): mkdir(rnn_dir) mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5) mdrnn.to(device) optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) earlystopping = EarlyStopping('min', patience=30)
def run_vae(): seed = np.random.randint(1, 2147462579) def sinus_seq(period, samples, length): X = np.linspace(-np.pi * (samples / period), np.pi * (samples / period), samples) X = np.reshape(np.sin(X), (-1, length)) X += np.random.randn(*X.shape) * 0.1 # X = (X - np.min(X))/(np.max(X) - np.min(X)) return X, np.ones((samples / length, 1)) X1, y1 = sinus_seq(40, 100000, 50) X2, y2 = sinus_seq(20, 40000, 50) X = np.concatenate((X1, X2)).astype('float32') y = np.concatenate((y1 * 0, y2 * 1), axis=0).astype('int') dim_samples, dim_features = X.shape X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) # X, y, users, stats = har.load() # # limited_labels = y < 5 # y = y[limited_labels] # X = X[limited_labels] # users = users[limited_labels] # # # Compress labels # for idx, label in enumerate(np.unique(y)): # if not np.equal(idx, label): # y[y == label] = idx # # y_unique = np.unique(y) # y = one_hot(y, len(y_unique)) # # dim_samples, dim_sequence, dim_features = X.shape # num_classes = len(y_unique) # # # Split into train and test stratified by users # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=users) # Combine in sets train_set = (X_train, y_train) test_set = (X_test, y_test) print('Train size: ', train_set[0].shape) print('Test size: ', test_set[0].shape) n, n_x = train_set[0].shape # Datapoints in the dataset, input features. n_batches = n / 100 # The number of batches. bs = n / n_batches # The batchsize. # Initialize the auxiliary deep generative model. model = VAE(n_x=int(n_x), n_z=16, z_hidden=[16], xhat_hidden=[32], x_dist='gaussian') # Get the training functions. f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model( train_set, test_set) # Update the default function arguments. train_args['inputs']['batchsize'] = 100 train_args['inputs']['learningrate'] = 1e-3 train_args['inputs']['beta1'] = 0.9 train_args['inputs']['beta2'] = 0.999 def custom_evaluation(model, path): plt.clf() f, axarr = plt.subplots(nrows=len(np.unique(y)), ncols=1) for idx, y_l in enumerate(np.unique(y)): act_idx = test_set[1] == y_l test_act = test_set[0][act_idx[:, 0]] z = model.f_qz(test_act, 1) xhat = model.f_px(z, 1) axarr[idx].plot(test_act[:3].reshape(-1, 1), color='red') axarr[idx].plot(xhat[:3].reshape(-1, 1), color='blue', linestyle='dotted') f.set_size_inches(8, 5) f.savefig(path, dpi=100, format='png') plt.close(f) # Define training loop. Output training evaluations every 1 epoch # and the custom evaluation method every 10 epochs. train = TrainModel(model=model, output_freq=1, pickle_f_custom_freq=100, f_custom_eval=custom_evaluation) train.add_initial_training_notes("Training the rae with bn %s. seed %i." % (str(model.batchnorm), seed)) train.train_model(f_train, train_args, f_test, test_args, f_validate, validate_args, n_train_batches=n_batches, n_epochs=10000, anneal=[("learningrate", 100, 0.75, 3e-5)])
transform_train, train=True) dataset_test = RolloutObservationDataset('datasets/pacman', transform_test, train=False) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=2) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True, num_workers=2) model = VAE(3, LSIZE).to(device) optimizer = optim.Adam(model.parameters()) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) earlystopping = EarlyStopping('min', patience=30) # Reconstruction + KL divergence losses summed over all elements and batch def loss_function(recon_x, x, mu, logsigma): """ VAE loss function """ BCE = F.mse_loss(recon_x, x, size_average=False) # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) KLD = -0.5 * torch.sum(1 + 2 * logsigma - mu.pow(2) - (2 * logsigma).exp())
transform_test, train=False) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=True) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=True) trained = 0 #model = VAE(3, LSIZE).to(device) model = VAE(3, LSIZE) model = torch.nn.DataParallel(model, device_ids=range(8)) model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999)) # scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5) # earlystopping = EarlyStopping('min', patience=30) vis = visdom.Visdom(env='vae_pt') ground_window = vis.image( np.random.rand(64, 64), opts=dict(title='ground!', caption='ground.'), ) image_window = vis.image(
action='store_true', help='Does not save samples during training if specified') args = parser.parse_args() cuda = torch.cuda.is_available() learning_rate = 1e-4 torch.manual_seed(914) # Fix numeric divergence due to bug in Cudnn torch.backends.cudnn.benchmark = True device = torch.device("cuda" if cuda else "cpu") trained = 0 #model = VAE(3, LSIZE).to(device) model = VAE(3, LSIZE) model = torch.nn.DataParallel(model, device_ids=range(8)) model.cuda() model.eval() # vis = visdom.Visdom(env='vae_pt') # # ground_window = vis.image( # np.random.rand(RED_SIZE*10, RED_SIZE*10), # opts=dict(title='ground!', caption='ground.'), # ) # image_window = vis.image( # np.random.rand(RED_SIZE*10, RED_SIZE*10), # opts=dict(title='image!', caption='image.'), # ) # vae_window = vis.image( # np.random.rand(RED_SIZE*10, RED_SIZE*10),
def main( model_name, dataset, dataroot, download, augment, batch_size, eval_batch_size, epochs, saved_model, seed, hidden_channels, K, L, actnorm_scale, flow_permutation, flow_coupling, LU_decomposed, learn_top, y_condition, y_weight, max_grad_clip, max_grad_norm, lr, n_workers, cuda, n_init_batches, output_dir, saved_optimizer, warmup, ): vis = visdom.Visdom() env = "{}_{}".format(model_name, dataset) device = "cpu" if (not torch.cuda.is_available() or not cuda) else "cuda:0" check_manual_seed(seed) ds = check_dataset(dataset, dataroot, augment, download) image_shape, num_classes, train_dataset, test_dataset = ds # Note: unsupported for now multi_class = False train_loader = data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=n_workers, drop_last=True, ) test_loader = data.DataLoader( test_dataset, batch_size=eval_batch_size, shuffle=False, num_workers=n_workers, drop_last=False, ) if model_name == "Glow": model = Glow( image_shape, hidden_channels, K, L, actnorm_scale, flow_permutation, flow_coupling, LU_decomposed, num_classes, learn_top, y_condition, ) elif model_name == "VAE": model = VAE( image_shape, hidden_channels, ) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=5e-5) lr_lambda = lambda epoch: min(1.0, (epoch + 1) / warmup) # noqa scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda) train_loss_window = create_plot_window(vis, env, '#Iterations', 'Loss', 'Training Loss') val_avg_loss_window = create_plot_window(vis, env, '#Epochs', 'Loss', 'Validation Average Loss') train_image_window = create_image_window(vis, env, 'Training Images') def step(engine, batch): model.train() optimizer.zero_grad() x, y = batch x = x.to(device) if y_condition: y = y.to(device) z, nll, y_logits = model(x, y) losses = compute_loss_y(nll, y_logits, y_weight, y, multi_class) else: z, nll, y_logits, im = model(x) losses = compute_loss(nll) if engine.state.iteration % 250 == 1: vis.line(X=np.array([engine.state.iteration]), Y=np.array([losses["total_loss"].item()]), win=train_loss_window, update='append', env=env) vis.images(postprocess(im), nrow=16, win=train_image_window, env=env) losses["total_loss"].backward() if max_grad_clip > 0: torch.nn.utils.clip_grad_value_(model.parameters(), max_grad_clip) if max_grad_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) optimizer.step() return losses def eval_step(engine, batch): model.eval() x, y = batch x = x.to(device) with torch.no_grad(): if y_condition: y = y.to(device) z, nll, y_logits = model(x, y) losses = compute_loss_y(nll, y_logits, y_weight, y, multi_class, reduction="none") else: z, nll, y_logits, im = model(x) losses = compute_loss(nll, reduction="none") return losses trainer = Engine(step) checkpoint_handler = ModelCheckpoint(output_dir, model_name, save_interval=1, n_saved=5, require_empty=False) trainer.add_event_handler( Events.EPOCH_COMPLETED, checkpoint_handler, { "model": model, "optimizer": optimizer }, ) monitoring_metrics = ["total_loss"] RunningAverage(output_transform=lambda x: x["total_loss"]).attach( trainer, "total_loss") evaluator = Engine(eval_step) # Note: replace by https://github.com/pytorch/ignite/pull/524 when released Loss( lambda x, y: torch.mean(x), output_transform=lambda x: ( x["total_loss"], torch.empty(x["total_loss"].shape[0]), ), ).attach(evaluator, "total_loss") if y_condition: monitoring_metrics.extend(["nll"]) RunningAverage(output_transform=lambda x: x["nll"]).attach( trainer, "nll") # Note: replace by https://github.com/pytorch/ignite/pull/524 when released Loss( lambda x, y: torch.mean(x), output_transform=lambda x: (x["nll"], torch.empty(x["nll"].shape[0])), ).attach(evaluator, "nll") pbar = ProgressBar() pbar.attach(trainer, metric_names=monitoring_metrics) # load pre-trained model if given if saved_model: model.load_state_dict(torch.load(saved_model)) model.set_actnorm_init() if saved_optimizer: optimizer.load_state_dict(torch.load(saved_optimizer)) file_name, ext = os.path.splitext(saved_model) resume_epoch = int(file_name.split("_")[-1]) @trainer.on(Events.STARTED) def resume_training(engine): engine.state.epoch = resume_epoch engine.state.iteration = resume_epoch * len( engine.state.dataloader) @trainer.on(Events.STARTED) def init(engine): model.train() init_batches = [] init_targets = [] with torch.no_grad(): for batch, target in islice(train_loader, None, n_init_batches): init_batches.append(batch) init_targets.append(target) init_batches = torch.cat(init_batches).to(device) assert init_batches.shape[0] == n_init_batches * batch_size if y_condition: init_targets = torch.cat(init_targets).to(device) model(init_batches, init_targets) else: init_targets = None model(init_batches) @trainer.on(Events.EPOCH_COMPLETED) def evaluate(engine): evaluator.run(test_loader) scheduler.step() metrics = evaluator.state.metrics losses = ", ".join( [f"{key}: {value:.2f}" for key, value in metrics.items()]) vis.line(X=np.array([engine.state.epoch]), Y=np.array([metrics["total_loss"]]), win=val_avg_loss_window, update='append', env=env) print(f"Validation Results - Epoch: {engine.state.epoch} {losses}") timer = Timer(average=True) timer.attach( trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED, ) @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): pbar.log_message( f"Epoch {engine.state.epoch} done. Time per batch: {timer.value():.3f}[s]" ) timer.reset() trainer.run(train_loader, epochs)
train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) in_channel = 1 in_height = in_width = 28 model = VAE(height=in_height, width=in_width, in_channel=in_channel, z_dim=args.z_dim, k=args.train_k).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) def train_iwae(epoch): model.train() train_loss = 0 for batch_idx, (data, _) in enumerate(train_loader): data = data.to(device) optimizer.zero_grad() x_prime, z, mu, logvar = model(data) loss = model.neg_elbo_iwae(data, x_prime,
class OfflineMetaLearner: """ Off-line Meta-Learner class, a.k.a no interaction with env. """ def __init__(self, args): """ Seeds everything. Initialises: logger, environments, policy (+storage +optimiser). """ self.args = args # make sure everything has the same seed utl.seed(self.args.seed) # initialize tensorboard logger if self.args.log_tensorboard: self.tb_logger = TBLogger(self.args) self.args, env = off_utl.expand_args(self.args, include_act_space=True) if self.args.act_space.__class__.__name__ == "Discrete": self.args.policy = 'dqn' else: self.args.policy = 'sac' # load buffers with data if 'load_data' not in self.args or self.args.load_data: goals, augmented_obs_dim = self.load_buffer( env) # env is input just for possible relabelling option self.args.augmented_obs_dim = augmented_obs_dim self.goals = goals # initialize policy self.initialize_policy() # load vae for inference in evaluation self.load_vae() # create environment for evaluation self.env = make_env( args.env_name, args.max_rollouts_per_task, presampled_tasks=args.presampled_tasks, seed=args.seed, ) # n_tasks=self.args.num_eval_tasks) if self.args.env_name == 'GridNavi-v2': self.env.unwrapped.goals = [ tuple(goal.astype(int)) for goal in self.goals ] def initialize_policy(self): if self.args.policy == 'dqn': q_network = FlattenMlp(input_size=self.args.augmented_obs_dim, output_size=self.args.act_space.n, hidden_sizes=self.args.dqn_layers).to( ptu.device) self.agent = DQN( q_network, # optimiser_vae=self.optimizer_vae, lr=self.args.policy_lr, gamma=self.args.gamma, tau=self.args.soft_target_tau, ).to(ptu.device) else: # assert self.args.act_space.__class__.__name__ == "Box", ( # "Can't train SAC with discrete action space!") q1_network = FlattenMlp( input_size=self.args.augmented_obs_dim + self.args.action_dim, output_size=1, hidden_sizes=self.args.dqn_layers).to(ptu.device) q2_network = FlattenMlp( input_size=self.args.augmented_obs_dim + self.args.action_dim, output_size=1, hidden_sizes=self.args.dqn_layers).to(ptu.device) policy = TanhGaussianPolicy( obs_dim=self.args.augmented_obs_dim, action_dim=self.args.action_dim, hidden_sizes=self.args.policy_layers).to(ptu.device) self.agent = SAC( policy, q1_network, q2_network, actor_lr=self.args.actor_lr, critic_lr=self.args.critic_lr, gamma=self.args.gamma, tau=self.args.soft_target_tau, use_cql=self.args.use_cql if 'use_cql' in self.args else False, alpha_cql=self.args.alpha_cql if 'alpha_cql' in self.args else None, entropy_alpha=self.args.entropy_alpha, automatic_entropy_tuning=self.args.automatic_entropy_tuning, alpha_lr=self.args.alpha_lr, clip_grad_value=self.args.clip_grad_value, ).to(ptu.device) def load_vae(self): self.vae = VAE(self.args) vae_models_path = os.path.join(self.args.vae_dir, self.args.env_name, self.args.vae_model_name, 'models') off_utl.load_trained_vae(self.vae, vae_models_path) def load_buffer(self, env): if self.args.hindsight_relabelling: # without arr_type loading -- GPU will explode dataset, goals = off_utl.load_dataset( data_dir=self.args.relabelled_data_dir, args=self.args, num_tasks=self.args.num_train_tasks, allow_dense_data_loading=False, arr_type='numpy') dataset = off_utl.batch_to_trajectories(dataset, self.args) dataset, goals = off_utl.mix_task_rollouts( dataset, env, goals, self.args) # reward relabelling dataset = off_utl.trajectories_to_batch(dataset) else: dataset, goals = off_utl.load_dataset( data_dir=self.args.relabelled_data_dir, args=self.args, num_tasks=self.args.num_train_tasks, allow_dense_data_loading=False, arr_type='numpy') augmented_obs_dim = dataset[0][0].shape[1] self.storage = MultiTaskPolicyStorage( max_replay_buffer_size=max([d[0].shape[0] for d in dataset]), obs_dim=dataset[0][0].shape[1], action_space=self.args.act_space, tasks=range(len(goals)), trajectory_len=self.args.trajectory_len) for task, set in enumerate(dataset): self.storage.add_samples(task, observations=set[0], actions=set[1], rewards=set[2], next_observations=set[3], terminals=set[4]) return goals, augmented_obs_dim def train(self): self._start_training() for iter_ in range(self.args.num_iters): self.training_mode(True) indices = np.random.choice(len(self.goals), self.args.meta_batch) train_stats = self.update(indices) self.training_mode(False) self.log(iter_ + 1, train_stats) def update(self, tasks): rl_losses_agg = {} for update in range(self.args.rl_updates_per_iter): # sample random RL batch obs, actions, rewards, next_obs, terms = self.sample_rl_batch( tasks, self.args.batch_size) # flatten out task dimension t, b, _ = obs.size() obs = obs.view(t * b, -1) actions = actions.view(t * b, -1) rewards = rewards.view(t * b, -1) next_obs = next_obs.view(t * b, -1) terms = terms.view(t * b, -1) # RL update rl_losses = self.agent.update(obs, actions, rewards, next_obs, terms, action_space=self.env.action_space) for k, v in rl_losses.items(): if update == 0: # first iterate - create list rl_losses_agg[k] = [v] else: # append values rl_losses_agg[k].append(v) # take mean for k in rl_losses_agg: rl_losses_agg[k] = np.mean(rl_losses_agg[k]) self._n_rl_update_steps_total += self.args.rl_updates_per_iter return rl_losses_agg def evaluate(self): num_episodes = self.args.max_rollouts_per_task num_steps_per_episode = self.env.unwrapped._max_episode_steps num_tasks = self.args.num_eval_tasks obs_size = self.env.unwrapped.observation_space.shape[0] returns_per_episode = np.zeros((num_tasks, num_episodes)) success_rate = np.zeros(num_tasks) rewards = np.zeros((num_tasks, self.args.trajectory_len)) reward_preds = np.zeros((num_tasks, self.args.trajectory_len)) observations = np.zeros( (num_tasks, self.args.trajectory_len + 1, obs_size)) if self.args.policy == 'sac': log_probs = np.zeros((num_tasks, self.args.trajectory_len)) # This part is very specific for the Semi-Circle env # if self.args.env_name == 'PointRobotSparse-v0': # reward_belief = np.zeros((num_tasks, self.args.trajectory_len)) # # low_x, high_x, low_y, high_y = -2., 2., -1., 2. # resolution = 0.1 # grid_x = np.arange(low_x, high_x + resolution, resolution) # grid_y = np.arange(low_y, high_y + resolution, resolution) # centers_x = (grid_x[:-1] + grid_x[1:]) / 2 # centers_y = (grid_y[:-1] + grid_y[1:]) / 2 # yv, xv = np.meshgrid(centers_y, centers_x, sparse=False, indexing='ij') # centers = np.vstack([xv.ravel(), yv.ravel()]).T # n_grid_points = centers.shape[0] # reward_belief_discretized = np.zeros((num_tasks, self.args.trajectory_len, centers.shape[0])) for task_loop_i, task in enumerate( self.env.unwrapped.get_all_eval_task_idx()): obs = ptu.from_numpy(self.env.reset(task)) obs = obs.reshape(-1, obs.shape[-1]) step = 0 # get prior parameters with torch.no_grad(): task_sample, task_mean, task_logvar, hidden_state = self.vae.encoder.prior( batch_size=1) observations[task_loop_i, step, :] = ptu.get_numpy(obs[0, :obs_size]) for episode_idx in range(num_episodes): running_reward = 0. for step_idx in range(num_steps_per_episode): # add distribution parameters to observation - policy is conditioned on posterior augmented_obs = self.get_augmented_obs( obs, task_mean, task_logvar) if self.args.policy == 'dqn': action, value = self.agent.act(obs=augmented_obs, deterministic=True) else: action, _, _, log_prob = self.agent.act( obs=augmented_obs, deterministic=self.args.eval_deterministic, return_log_prob=True) # observe reward and next obs next_obs, reward, done, info = utl.env_step( self.env, action.squeeze(dim=0)) running_reward += reward.item() # done_rollout = False if ptu.get_numpy(done[0][0]) == 0. else True # update encoding task_sample, task_mean, task_logvar, hidden_state = self.update_encoding( obs=next_obs, action=action, reward=reward, done=done, hidden_state=hidden_state) rewards[task_loop_i, step] = reward.item() reward_preds[task_loop_i, step] = ptu.get_numpy( self.vae.reward_decoder(task_sample, next_obs, obs, action)[0, 0]) # This part is very specific for the Semi-Circle env # if self.args.env_name == 'PointRobotSparse-v0': # reward_belief[task, step] = ptu.get_numpy( # self.vae.compute_belief_reward(task_mean, task_logvar, obs, next_obs, action)[0]) # # reward_belief_discretized[task, step, :] = ptu.get_numpy( # self.vae.compute_belief_reward(task_mean.repeat(n_grid_points, 1), # task_logvar.repeat(n_grid_points, 1), # None, # torch.cat((ptu.FloatTensor(centers), # ptu.zeros(centers.shape[0], 1)), dim=-1).unsqueeze(0), # None)[:, 0]) observations[task_loop_i, step + 1, :] = ptu.get_numpy( next_obs[0, :obs_size]) if self.args.policy != 'dqn': log_probs[task_loop_i, step] = ptu.get_numpy(log_prob[0]) if "is_goal_state" in dir( self.env.unwrapped ) and self.env.unwrapped.is_goal_state(): success_rate[task_loop_i] = 1. # set: obs <- next_obs obs = next_obs.clone() step += 1 returns_per_episode[task_loop_i, episode_idx] = running_reward if self.args.policy == 'dqn': return returns_per_episode, success_rate, observations, rewards, reward_preds # This part is very specific for the Semi-Circle env # elif self.args.env_name == 'PointRobotSparse-v0': # return returns_per_episode, success_rate, log_probs, observations, \ # rewards, reward_preds, reward_belief, reward_belief_discretized, centers else: return returns_per_episode, success_rate, log_probs, observations, rewards, reward_preds def log(self, iteration, train_stats): # --- save model --- if iteration % self.args.save_interval == 0: save_path = os.path.join(self.tb_logger.full_output_folder, 'models') if not os.path.exists(save_path): os.mkdir(save_path) torch.save( self.agent.state_dict(), os.path.join(save_path, "agent{0}.pt".format(iteration))) if iteration % self.args.log_interval == 0: if self.args.policy == 'dqn': returns, success_rate, observations, rewards, reward_preds = self.evaluate( ) # This part is super specific for the Semi-Circle env # elif self.args.env_name == 'PointRobotSparse-v0': # returns, success_rate, log_probs, observations, \ # rewards, reward_preds, reward_belief, reward_belief_discretized, points = self.evaluate() else: returns, success_rate, log_probs, observations, rewards, reward_preds = self.evaluate( ) if self.args.log_tensorboard: tasks_to_vis = np.random.choice(self.args.num_eval_tasks, 5) for i, task in enumerate(tasks_to_vis): self.env.reset(task) if PLOT_VIS: self.tb_logger.writer.add_figure( 'policy_vis/task_{}'.format(i), utl_eval.plot_rollouts(observations[task, :], self.env), self._n_rl_update_steps_total) self.tb_logger.writer.add_figure( 'reward_prediction_train/task_{}'.format(i), utl_eval.plot_rew_pred_vs_rew(rewards[task, :], reward_preds[task, :]), self._n_rl_update_steps_total) # self.tb_logger.writer.add_figure('reward_prediction_train/task_{}'.format(i), # utl_eval.plot_rew_pred_vs_reward_belief_vs_rew(rewards[task, :], # reward_preds[task, :], # reward_belief[task, :]), # self._n_rl_update_steps_total) # if self.args.env_name == 'PointRobotSparse-v0': # This part is super specific for the Semi-Circle env # for t in range(0, int(self.args.trajectory_len/4), 3): # self.tb_logger.writer.add_figure('discrete_belief_reward_pred_task_{}/timestep_{}'.format(i, t), # utl_eval.plot_discretized_belief_halfcircle(reward_belief_discretized[task, t, :], # points, self.env, # observations[task, :t+1]), # self._n_rl_update_steps_total) if self.args.max_rollouts_per_task > 1: for episode_idx in range(self.args.max_rollouts_per_task): self.tb_logger.writer.add_scalar( 'returns_multi_episode/episode_{}'.format( episode_idx + 1), np.mean(returns[:, episode_idx]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'returns_multi_episode/sum', np.mean(np.sum(returns, axis=-1)), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'returns_multi_episode/success_rate', np.mean(success_rate), self._n_rl_update_steps_total) else: self.tb_logger.writer.add_scalar( 'returns/returns_mean', np.mean(returns), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'returns/returns_std', np.std(returns), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'returns/success_rate', np.mean(success_rate), self._n_rl_update_steps_total) if self.args.policy == 'dqn': self.tb_logger.writer.add_scalar( 'rl_losses/qf_loss_vs_n_updates', train_stats['qf_loss'], self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/q_network', list(self.agent.qf.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.qf.parameters())[0].grad is not None: param_list = list(self.agent.qf.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q_network', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/q_target', list(self.agent.target_qf.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.target_qf.parameters() )[0].grad is not None: param_list = list(self.agent.target_qf.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q_target', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) else: self.tb_logger.writer.add_scalar( 'policy/log_prob', np.mean(log_probs), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'rl_losses/qf1_loss', train_stats['qf1_loss'], self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'rl_losses/qf2_loss', train_stats['qf2_loss'], self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'rl_losses/policy_loss', train_stats['policy_loss'], self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'rl_losses/alpha_entropy_loss', train_stats['alpha_entropy_loss'], self._n_rl_update_steps_total) # weights and gradients self.tb_logger.writer.add_scalar( 'weights/q1_network', list(self.agent.qf1.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.qf1.parameters())[0].grad is not None: param_list = list(self.agent.qf1.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q1_network', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/q1_target', list(self.agent.qf1_target.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.qf1_target.parameters() )[0].grad is not None: param_list = list(self.agent.qf1_target.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q1_target', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/q2_network', list(self.agent.qf2.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.qf2.parameters())[0].grad is not None: param_list = list(self.agent.qf2.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q2_network', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/q2_target', list(self.agent.qf2_target.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.qf2_target.parameters() )[0].grad is not None: param_list = list(self.agent.qf2_target.parameters()) self.tb_logger.writer.add_scalar( 'gradients/q2_target', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) self.tb_logger.writer.add_scalar( 'weights/policy', list(self.agent.policy.parameters())[0].mean(), self._n_rl_update_steps_total) if list(self.agent.policy.parameters() )[0].grad is not None: param_list = list(self.agent.policy.parameters()) self.tb_logger.writer.add_scalar( 'gradients/policy', sum([ param_list[i].grad.mean() for i in range(len(param_list)) ]), self._n_rl_update_steps_total) for k, v in [ ('num_rl_updates', self._n_rl_update_steps_total), ('time_elapsed', time.time() - self._start_time), ('iteration', iteration), ]: self.tb_logger.writer.add_scalar(k, v, self._n_rl_update_steps_total) self.tb_logger.finish_iteration(iteration) print( "Iteration -- {}, Success rate -- {:.3f}, Avg. return -- {:.3f}, Elapsed time {:5d}[s]" .format(iteration, np.mean(success_rate), np.mean(np.sum(returns, axis=-1)), int(time.time() - self._start_time))) def sample_rl_batch(self, tasks, batch_size): ''' sample batch of unordered rl training data from a list/array of tasks ''' # this batch consists of transitions sampled randomly from replay buffer batches = [ ptu.np_to_pytorch_batch(self.storage.random_batch( task, batch_size)) for task in tasks ] unpacked = [utl.unpack_batch(batch) for batch in batches] # group elements together unpacked = [[x[i] for x in unpacked] for i in range(len(unpacked[0]))] unpacked = [torch.cat(x, dim=0) for x in unpacked] return unpacked def _start_training(self): self._n_rl_update_steps_total = 0 self._start_time = time.time() def training_mode(self, mode): self.agent.train(mode) def update_encoding(self, obs, action, reward, done, hidden_state): # reset hidden state of the recurrent net when the task is done hidden_state = self.vae.encoder.reset_hidden(hidden_state, done) with torch.no_grad(): # size should be (batch, dim) task_sample, task_mean, task_logvar, hidden_state = self.vae.encoder( actions=action.float(), states=obs, rewards=reward, hidden_state=hidden_state, return_prior=False) return task_sample, task_mean, task_logvar, hidden_state @staticmethod def get_augmented_obs(obs, mean, logvar): mean = mean.reshape((-1, mean.shape[-1])) logvar = logvar.reshape((-1, logvar.shape[-1])) return torch.cat((obs, mean, logvar), dim=-1) def load_model(self, agent_path, device='cpu'): self.agent.load_state_dict(torch.load(agent_path, map_location=device)) self.load_vae() self.training_mode(False)
def _train_vae(log_dir, offline_buffer_path, saved_tasks_path, env_type, seed, path_length, meta_episode_len, load_buffer_kwargs=None, **kwargs): with open(os.path.join(log_dir, 'test.txt'), 'w') as f: f.write("hello from train_vae_offline.py") if load_buffer_kwargs is None: load_buffer_kwargs = {} random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) parser = argparse.ArgumentParser() # parser.add_argument('--env-type', default='gridworld') # parser.add_argument('--env-type', default='point_robot_sparse') # parser.add_argument('--env-type', default='cheetah_vel') parser.add_argument('--env-type', default='ant_semicircle_sparse') extra_args = [] for k, v in kwargs.items(): extra_args.append('--{}'.format(k)) extra_args.append(str(v)) args, rest_args = parser.parse_known_args(args=extra_args) # --- GridWorld --- if env_type == 'cheetah_vel': args = args_cheetah_vel.get_args(rest_args) args.env_name = 'HalfCheetahVel-v0' elif env_type == 'ant_dir': # TODO: replace with ant_dir env args = args_ant_semicircle_sparse.get_args(rest_args) parser.add_argument('--env-name', default='AntSemiCircleSparse-v0') args.env_name = 'AntDir-v0' elif env_type == 'walker': args = args_walker_param.get_args(rest_args) elif env_type == 'hopper': args = args_hopper_param.get_args(rest_args) elif env_type == 'humanoid': args = args_humanoid_dir.get_args(rest_args) else: raise ValueError('Unknown env_type: {}'.format(env_type)) set_gpu_mode(torch.cuda.is_available() and args.use_gpu) args, env = off_utl.expand_args(args) args.save_dir = os.path.join(log_dir, 'trained_vae') args.trajectory_len = path_length task_data = joblib.load(saved_tasks_path) tasks = task_data['tasks'] print("loading dataset") with open(os.path.join(log_dir, 'tmp1.txt'), 'w') as f: f.write("train_vae_offline.py: start loading dataset") dataset, goals = off_utl.load_pearl_buffer( pretrain_buffer_path=offline_buffer_path, tasks=tasks, add_done_info=env.add_done_info, path_length=path_length, meta_episode_len=meta_episode_len, **load_buffer_kwargs) with open(os.path.join(log_dir, 'tmp1.txt'), 'a') as f: f.write("train_vae_offline.py: done loading dataset") print("done loading dataset") for data in dataset: print(data[0].shape) dataset = [[x.astype(np.float32) for x in d] for d in dataset] if args.save_model: dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \ and args.save_dir_prefix is not None else '' args.full_save_path = os.path.join( args.save_dir, args.env_name, dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S')) os.makedirs(args.full_save_path, exist_ok=True) config_utl.save_config_file(args, args.full_save_path) vae = VAE(args) train(vae, dataset, args)
import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from models import * from utils import * from train import * from models.vae import VAE device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SOS_token = 0 # Start Of Sentence token EOS_token = 1 # End Of Sentence token MAX_LENGTH = 16 max_length = 16 if __name__ =='__main__': #----------Hyper Parameters----------# hidden_size = 256 cond_size = 4 latent_size = 32 vocab_size = 28 #The number of vocabulary vae = VAE(vocab_size, hidden_size, latent_size, cond_size, vocab_size).to(device) words, tenses = prepare_data() data = MyData() data_loader = DataLoader(data, batch_size=32, shuffle=True, collate_fn=collate_fn) history = trainEpochs(vae, data_loader, n_epochs=5000, learning_rate=0.001, verbose=False) save_model(vae, model_name='vae_5000')
print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((64, 64)), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) # from https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py if __name__=="__main__": model=VAE(3, 64) model=torch.nn.DataParallel(model,device_ids=range(1)) model.cuda() controller=Controller_class(64,3) controller=torch.nn.DataParallel(controller,device_ids=range(1)) controller=controller.cuda() state = torch.load('/home/ld/gym-car/log/class/contorl_checkpoint_10.pkl') controller.load_state_dict(state['state_dict']) print('contorller load success') state = torch.load('/home/ld/gym-car/log/class/vae_checkpoint_10.pkl') model.load_state_dict(state['state_dict']) print('vae load success') # from pyglet.window import key action = np.array( [0.0, 0.0, 0.0] ) # def key_press(k, mod): # global restart