示例#1
0
def test(model: vae.VAE, real_data):
    model.eval()
    real_data = real_data.to(model.device).unsqueeze(1).float() / 255.
    recon_data = model(real_data)[3]
    plot_grid(torch.cat([real_data.detach()[:32], recon_data.detach()[:32]], dim=0),
              figsize=(8, 8), gridspec_kw=dict(wspace=0, hspace=0))
    plt.show()
示例#2
0
    def __init__(self, directory):
        vae_file = join(directory, 'vae', 'best.tar')
        rnn_file = join(directory, 'mdrnn', 'best.tar')
        assert exists(vae_file), "No VAE model in the directory..."
        assert exists(rnn_file), "No MDRNN model in the directory..."

        # spaces
        self.action_space = spaces.Box(np.array([-1, 0, 0]),
                                       np.array([1, 1, 1]))
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(RED_SIZE, RED_SIZE, 3),
                                            dtype=np.uint8)

        # load VAE
        vae = VAE(3, LSIZE)
        vae_state = torch.load(vae_file,
                               map_location=lambda storage, location: storage)
        print("Loading VAE at epoch {}, "
              "with test error {}...".format(vae_state['epoch'],
                                             vae_state['precision']))
        vae.load_state_dict(vae_state['state_dict'])
        self._decoder = vae.decoder

        # load MDRNN
        self._rnn = MDRNNCell(32, 3, RSIZE, 5)
        rnn_state = torch.load(rnn_file,
                               map_location=lambda storage, location: storage)
        print("Loading MDRNN at epoch {}, "
              "with test error {}...".format(rnn_state['epoch'],
                                             rnn_state['precision']))
        rnn_state_dict = {
            k.strip('_l0'): v
            for k, v in rnn_state['state_dict'].items()
        }
        self._rnn.load_state_dict(rnn_state_dict)

        # init state
        self._lstate = torch.randn(1, LSIZE)
        self._hstate = 2 * [torch.zeros(1, RSIZE)]

        # obs
        self._obs = None
        self._visual_obs = None

        # rendering
        self.monitor = None
        self.figure = None
def hvae_from_args(args):
    if (args.net_type == 'vae'):
        net = VAE(latent_size=args.latent_size,
                  img_size=args.IM_SIZE,
                  layer_sizes=args.layer_sizes)
        sizes_str = "_".join(str(x) for x in args.layer_sizes)
        file_name = 'VAE-' + str(sizes_str) + '-' + str(
            args.latent_size) + '-' + str(args.dataset)

    if (args.net_type == 'CVAE_ART'):
        net = CVAE_ART(latent_size=args.latent_size,
                       img_size=args.IM_SIZE,
                       num_labels=args.num_labels)
        file_name = 'CVAE_ART-' + str(args.latent_size) + '-' + str(
            args.IM_SIZE)

    elif (args.net_type == 'ConvVAE2d'):
        if (args.small_net_type == 'CVAE_SMALL'):
            small_net = CVAE_SMALL(latent_size=args.latent_size_small_vae,
                                   img_size=args.cvae_input_sz,
                                   num_labels=args.num_labels)

        net = ConvVAE2d(cvae_small=small_net,
                        cvae_input_sz=args.cvae_input_sz,
                        stride=args.stride,
                        img_size=args.IM_SIZE)
        file_name = 'ConvVAE2d-' + str(args.IM_SIZE) + '-' + str(
            args.stride) + '-' + str(args.cvae_input_sz) + '-' + str(
                args.latent_size_small_vae)
    else:
        print('Error : Wrong net type')
        sys.exit(0)
    return net, file_name
def init_model(args):
    if args.flow == 'no_flow':
        model = VAE(args).to(args.device)
    elif args.flow == 'boosted':
        model = BoostedVAE(args).to(args.device)
    elif args.flow == 'planar':
        model = PlanarVAE(args).to(args.device)
    elif args.flow == 'radial':
        model = RadialVAE(args).to(args.device)
    elif args.flow == 'liniaf':
        model = LinIAFVAE(args).to(args.device)
    elif args.flow == 'affine':
        model = AffineVAE(args).to(args.device)
    elif args.flow == 'nlsq':
        model = NLSqVAE(args).to(args.device)
    elif args.flow == 'iaf':
        model = IAFVAE(args).to(args.device)
    elif args.flow == "realnvp":
        model = RealNVPVAE(args).to(args.device)
    elif args.flow == 'orthogonal':
        model = OrthogonalSylvesterVAE(args).to(args.device)
    elif args.flow == 'householder':
        model = HouseholderSylvesterVAE(args).to(args.device)
    elif args.flow == 'triangular':
        model = TriangularSylvesterVAE(args).to(args.device)
    else:
        raise ValueError('Invalid flow choice')

    return model
示例#5
0
def main(_):
    
    if FLAGS.network == 'vae':
        model = VAE(mode=FLAGS.mode, batch_size=FLAGS.batch_size, latent_dim=FLAGS.latent_dim)
        solver = VAE_Solver(model, batch_size=FLAGS.batch_size, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path,
                        model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path)
        
        # create directories if not exist
        if not tf.gfile.Exists(FLAGS.model_save_path):
            tf.gfile.MakeDirs(FLAGS.model_save_path)
        if not tf.gfile.Exists(FLAGS.sample_save_path):
            tf.gfile.MakeDirs(FLAGS.sample_save_path)
        
        if FLAGS.mode == 'train':
            solver.train()
        elif FLAGS.mode == 'reconstruct':
            solver.reconstruct()
        elif FLAGS.mode == 'sample':
            solver.sample()
        elif FLAGS.mode == 'encode':
            solver.encode()

    elif FLAGS.network == 'gan':
        z_dim = 100
        model = GAN(mode=FLAGS.mode)
        solver = GAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path,
                        model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path)
        
        # create directories if not exist
        if not tf.gfile.Exists(FLAGS.model_save_path):
            tf.gfile.MakeDirs(FLAGS.model_save_path)
        if not tf.gfile.Exists(FLAGS.sample_save_path):
            tf.gfile.MakeDirs(FLAGS.sample_save_path)
        
        if FLAGS.mode == 'train':
            solver.train()
        elif FLAGS.mode == 'sample':
            solver.sample()

    elif FLAGS.network == 'acgan':
        z_dim = 128
        feature_class = 'Smiling'
        model = ACGAN(mode=FLAGS.mode, batch_size=FLAGS.batch_size)
        solver = ACGAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, feature_class=feature_class, 
                        train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path,
                        model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path)
        
        # create directories if not exist
        if not tf.gfile.Exists(FLAGS.model_save_path):
            tf.gfile.MakeDirs(FLAGS.model_save_path)
        if not tf.gfile.Exists(FLAGS.sample_save_path):
            tf.gfile.MakeDirs(FLAGS.sample_save_path)
        
        if FLAGS.mode == 'train':
            solver.train()
        elif FLAGS.mode == 'sample':
            solver.sample()
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    # parser.add_argument('--env-type', default='ant_semicircle_sparse')
    parser.add_argument('--env-type', default='point_robot_wind')
    # parser.add_argument('--env-type', default='escape_room')

    args, rest_args = parser.parse_known_args()
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    elif env == 'escape_room':
        args = args_point_robot_barrier.get_args(rest_args)
    elif env == 'point_robot_wind':
        args = args_point_robot_rand_params.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    args, env = off_utl.expand_args(args)

    dataset, goals = off_utl.load_dataset(data_dir=args.data_dir, args=args, arr_type='numpy')
    # dataset, goals = off_utl.load_dataset(args)
    if args.hindsight_relabelling:
        print('Perform reward relabelling...')
        dataset, goals = off_utl.mix_task_rollouts(dataset, env, goals, args)

    if args.policy_replaying:
        mix_dataset, mix_goals = off_utl.load_replaying_dataset(data_dir=args.replaying_data_dir, args=args)
        print('Perform policy replaying...')
        dataset, goals = off_utl.mix_policy_rollouts(dataset, goals, mix_dataset, mix_goals, args)

    # vis test tasks
    # vis_train_tasks(env.unwrapped, goals)     # not with GridNavi

    if args.save_model:
        dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \
                                             and args.save_dir_prefix is not None else ''
        args.full_save_path = os.path.join(args.save_dir, args.env_name,
                                           dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S'))
        os.makedirs(args.full_save_path, exist_ok=True)
        config_utl.save_config_file(args, args.full_save_path)

    vae = VAE(args)
    train(vae, dataset, goals, args)
示例#7
0
def check():
    noise = torch.randn((100,32))    

    model = VAE()
    model.load_state_dict(torch.load("weights/vae/z25.pth"))

    out = model.decoder(noise)

    fig, ax = plt.subplots(nrows=10, ncols=10)

    plt.axis('off')
    i=0
    for row in ax:
        for col in row:
            col.imshow(out[i].view(28,28).detach().cpu().numpy(),cmap='gray')
            col.set_axis_off()
            i+=1

    plt.show()
示例#8
0
def train():

    model = VAE()
    loss_fn = NegativeELBO()
    optimizer = Adam(model.parameters(),lr=0.001)
    dataloader = Mnist()
    model.to(device)

    for i in range(30):
        tots = 0
        for batch_id,(x,_) in enumerate(dataloader):
            if torch.Size([784,0]) == x.shape:
                break
            x = x.t()
            optimizer.zero_grad()
            
            out,mean,log_variance = model(x)
            
            loss = loss_fn(x,out,mean,log_variance)
            loss.backward()
            optimizer.step()

            tots+=loss.item()

            if(batch_id%50==0):
                print(batch_id,loss.item()/100,"\t",tots/(batch_id*100+1))

        print("\n",i,tots/60000,"\n")
        torch.save(model.state_dict(),"weights/vae/z25.pth")
    def __init__(self, directory):
        vae_file = join(directory, 'vae', 'best.tar')
        rnn_file = join(directory, 'mdrnn', 'best.tar')
        assert exists(vae_file), "No VAE model in the directory..."
        assert exists(rnn_file), "No MDRNN model in the directory..."

        # spaces
        self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([1, 1, 1]))
        self.observation_space = spaces.Box(low=0, high=255, shape=(RED_SIZE, RED_SIZE, 3),
                                            dtype=np.uint8)

        # load VAE
        vae = VAE(3, LSIZE)
        vae_state = torch.load(vae_file, map_location=lambda storage, location: storage)
        print("Loading VAE at epoch {}, "
              "with test error {}...".format(
                  vae_state['epoch'], vae_state['precision']))
        vae.load_state_dict(vae_state['state_dict'])
        self._decoder = vae.decoder

        # load MDRNN
        self._rnn = MDRNNCell(32, 3, RSIZE, 5)
        rnn_state = torch.load(rnn_file, map_location=lambda storage, location: storage)
        print("Loading MDRNN at epoch {}, "
              "with test error {}...".format(
                  rnn_state['epoch'], rnn_state['precision']))
        rnn_state_dict = {k.strip('_l0'): v for k, v in rnn_state['state_dict'].items()}
        self._rnn.load_state_dict(rnn_state_dict)

        # init state
        self._lstate = torch.randn(1, LSIZE)
        self._hstate = 2 * [torch.zeros(1, RSIZE)]

        # obs
        self._obs = None
        self._visual_obs = None

        # rendering
        self.monitor = None
        self.figure = None
示例#10
0
def offline_experiment(doodad_config, variant):
    save_doodad_config(doodad_config)
    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default='ant_semicircle_sparse')
    args, rest_args = parser.parse_known_args(args=[])
    env = args.env_type

    # --- GridWorld ---
    if env == 'gridworld':
        args = args_gridworld.get_args(rest_args)
    # --- PointRobot ---
    elif env == 'point_robot_sparse':
        args = args_point_robot_sparse.get_args(rest_args)
    # --- Mujoco ---
    elif env == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
    elif env == 'ant_semicircle_sparse':
        args = args_ant_semicircle_sparse.get_args(rest_args)

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    vae_args = config_utl.load_config_file(
        os.path.join(args.vae_dir, args.env_name, args.vae_model_name,
                     'online_config.json'))
    args = config_utl.merge_configs(
        vae_args, args)  # order of input to this function is important

    # Transform data BAMDP (state relabelling)
    if args.transform_data_bamdp:
        # load VAE for state relabelling
        vae_models_path = os.path.join(args.vae_dir, args.env_name,
                                       args.vae_model_name, 'models')
        vae = VAE(args)
        off_utl.load_trained_vae(vae, vae_models_path)
        # load data and relabel
        save_data_path = os.path.join(args.main_data_dir, args.env_name,
                                      args.relabelled_data_dir)
        os.makedirs(save_data_path)
        dataset, goals = off_utl.load_dataset(data_dir=args.data_dir,
                                              args=args,
                                              arr_type='numpy')
        bamdp_dataset = off_utl.transform_mdps_ds_to_bamdp_ds(
            dataset, vae, args)
        # save relabelled data
        off_utl.save_dataset(save_data_path, bamdp_dataset, goals)

    learner = OfflineMetaLearner(args)

    learner.train()
示例#11
0
def test_vae(image_shape, batch_size=128, hid_dim=2):
    x = torch.zeros(batch_size, *image_shape)

    encoder = VAE(image_shape, hid_dim)
    mean_image, sampled_image, logits, z, mean, stddev = encoder(x)

    assert mean_image.shape == x.shape
    assert sampled_image.shape == x.shape
    assert logits.shape == x.shape

    assert z.shape == (batch_size, hid_dim)
    assert mean.shape == (batch_size, hid_dim)
    assert stddev.shape == (batch_size, hid_dim)
 def _build_vae(self):
     vae = VAE(hidden_units=512,
               latent_space_dim=100,
               num_input_channels=self._num_input_channels,
               conditional=self.conditional,
               num_labels=self.num_labels,
               device=self.device)
     if self.checkpoint_path is not None:
         vae.load_state_dict(torch.load(self.checkpoint_path))
     vae.to(device=self.device)
     return vae
示例#13
0
    def __init__(self, timelimit, pop_size, device):
        self.pop_size = pop_size
        self.truncation_threshold = int(pop_size /
                                        2)  # Should be dividable by two
        self.P = []

        # unique GA id
        self.init_time = datetime.now().strftime("%Y%m%d_%H%M%S")

        # load configuration params
        with open('config/creature.json') as f:
            config = json.load(f)
            model_fromdisk = config.get('vae.model.fromdisk')
            model_path = config.get('vae.model.path')

            latent_size = config.get('vae.latent.size')
            obs_size = config.get('vae.obs.size')
            num_effectors = config.get('joints.size') + config.get(
                'brushes.size')
            input_size = latent_size + num_effectors
            output_size = num_effectors

            cpg_enabled = config.get('cpg.enabled')
            if cpg_enabled:
                input_size += 1
                output_size += 1

        # load vision module
        from models.vae import VAE
        vae = VAE(latent_size).cuda()

        if model_fromdisk:
            vae.load_state_dict(torch.load(model_path))
            vae.eval()  # inference mode
            print(f'Loaded VAE model {model_path} from disk')

        print(f'Generating initial population of {pop_size} candidates...')

        # initialize population
        from train import GAIndividual
        for _ in range(pop_size):
            self.P.append(
                GAIndividual(self.init_time,
                             input_size,
                             output_size,
                             obs_size,
                             compressor=vae,
                             cpg_enabled=cpg_enabled,
                             device=device,
                             time_limit=timelimit))

        # report controller parameters
        self.num_controller_params = input_size * output_size + output_size
        print(f'Number of controller parameters: {self.num_controller_params}')
示例#14
0
    def __init__(self, num_epochs=50, batch_size=512, lr=1e-3, data_path=None, \
        ckpt_save_path='.', model_type='cnn', schedule=0, gamma=0.5,  train_mode='default'):
        self.train_mode = train_mode
        self.ckpt_save_path = ckpt_save_path
        print(f'Checkpoints will be stored at {ckpt_save_path}')
        """ Hyperparameters """
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr
        self.criterion = nn.MSELoss()
        """ Set up DataLoader & Sampler """
        x = torch.from_numpy(data_path)
        self.dataset = CustomTensorDataset(x)

        t_len = int(len(self.dataset) * 0.9)
        v_len = len(self.dataset) - t_len
        train_set, valid_set = random_split(self.dataset, [t_len, v_len])
        print(
            f'Train set: {len(train_set)} | Validation set: {len(valid_set)}')
        self.train_sampler = RandomSampler(train_set)
        self.valid_sampler = SequentialSampler(valid_set)
        self.train_dataloader = DataLoader(train_set,
                                           sampler=self.train_sampler,
                                           batch_size=self.batch_size)
        self.valid_dataloader = DataLoader(valid_set,
                                           sampler=self.valid_sampler,
                                           batch_size=self.batch_size)
        """ Select model """
        model_choices = {
            'cnn': CNN_AutoEncoder(),
            'vae': VAE(),
        }
        self.model_type = model_type
        self.model = model_choices[self.model_type].cuda()
        print(f'Training model: {model_type}')
        self.optimizer = Adam(self.model.parameters(), lr=self.lr)
        self.scheduler = None
        self.schedule = schedule
        self.gamma = gamma
        if schedule != 0:  # Enable lr_scheduler
            self.scheduler = lr_scheduler.StepLR(self.optimizer,
                                                 step_size=self.schedule,
                                                 gamma=self.gamma)
            print(
                f"Enabled lr_scheduler with step_size={schedule}, gamma={gamma}"
            )
def load_net(model_loc, args=None):
    model_file = Path(model_loc).name
    model_name = model_file.split('-')[0]

    if (model_name == 'CVAE'):
        model = CVAE(
            num_labels=int(model_file.split('-')[4].split('_')[0]),
            latent_size=int(model_file.split('-')[2]),
            img_size=32,
            layer_sizes=[int(i) for i in model_file.split('-')[1].split('_')])
    elif (model_name == 'VAE'):
        model = VAE(
            latent_size=int(model_file.split('-')[2]),
            img_size=32,
            layer_sizes=[int(i) for i in model_file.split('-')[1].split('_')])

    elif (model_name == 'FEAT_VAE_MNIST'):
        model = FEAT_VAE_MNIST(
            classifier_model=load_net(args.encoding_model_loc).to(args.device),
            num_features=int(model_file.split('-')[2].split('_')[0]),
            latent_size=int(model_file.split('-')[1].split('_')[0]))

    elif (model_name == 'ConvVAE2d'):
        latent_size_small_vae = int(model_file.split('-')[4].split('_')[0])
        cvae_input_sz = int(model_file.split('-')[3])
        stride = int(model_file.split('-')[2])
        IM_SIZE = int(model_file.split('-')[1])

        small_net = CVAE_SMALL(latent_size=latent_size_small_vae,
                               img_size=cvae_input_sz,
                               num_labels=11)

        model = ConvVAE2d(cvae_small=small_net,
                          cvae_input_sz=cvae_input_sz,
                          stride=stride,
                          img_size=IM_SIZE)

    else:
        print(f'Error : {model_file} not found')
        sys.exit(0)
    model.load_state_dict(torch.load(model_loc)['state_dict'])
    return model
示例#16
0
def main():
    flags = tf.flags
    flags.DEFINE_integer("latent_dim", 64, "Dimension of latent space.")
    flags.DEFINE_integer("obs_dim", 12288, "Dimension of observation space.")
    flags.DEFINE_integer("batch_size", 64, "Batch size.")
    flags.DEFINE_integer("epochs", 500, "As it said")
    flags.DEFINE_integer(
        "updates_per_epoch", 100,
        "Really just can set to 1 if you don't like mini-batch.")
    FLAGS = flags.FLAGS

    kwargs = {
        'latent_dim': FLAGS.latent_dim,
        'batch_size': FLAGS.batch_size,
        'observation_dim': FLAGS.obs_dim,
        'encoder': conv_anime_encoder,
        'decoder': conv_anime_decoder,
        'observation_distribution': 'Gaussian'
    }
    vae = VAE(**kwargs)
    provider = Anime()
    tbar = tqdm(range(FLAGS.epochs))
    for epoch in tbar:
        training_loss = 0.

        for _ in range(FLAGS.updates_per_epoch):
            x = provider.next_batch(FLAGS.batch_size)
            loss = vae.update(x)
            training_loss += loss

        training_loss /= FLAGS.updates_per_epoch
        s = "Loss: {:.4f}".format(training_loss)
        tbar.set_description(s)

    z = np.random.normal(size=[FLAGS.batch_size, FLAGS.latent_dim])
    samples = vae.z2x(z)[0]
    show_samples(samples, 8, 8, [64, 64, 3], name='samples')

    vae.save_generator('weights/vae_anime/generator')
示例#17
0
def main():
    flags = tf.flags
    flags.DEFINE_integer("latent_dim", 2, "Dimension of latent space.")
    flags.DEFINE_integer("batch_size", 128, "Batch size.")
    flags.DEFINE_integer("epochs", 500, "As it said")
    flags.DEFINE_integer("updates_per_epoch", 100, "Really just can set to 1 if you don't like mini-batch.")
    flags.DEFINE_string("data_dir", 'mnist', "Tensorflow demo data download position.")
    FLAGS = flags.FLAGS

    kwargs = {
        'latent_dim': FLAGS.latent_dim,
        'batch_size': FLAGS.batch_size,
        'encoder': fc_mnist_encoder,
        'decoder': fc_mnist_decoder
    }
    vae = VAE(**kwargs)
    mnist = input_data.read_data_sets(train_dir=FLAGS.data_dir)
    tbar = tqdm(range(FLAGS.epochs))
    for epoch in tbar:
        training_loss = 0.

        for _ in range(FLAGS.updates_per_epoch):
            x, _ = mnist.train.next_batch(FLAGS.batch_size)
            loss = vae.update(x)
            training_loss += loss

        training_loss /= FLAGS.updates_per_epoch
        s = "Loss: {:.4f}".format(training_loss)
        tbar.set_description(s)

    z = np.random.normal(size=[FLAGS.batch_size, FLAGS.latent_dim])
    samples = vae.z2x(z)[0]
    show_samples(samples, 10, 10, [28, 28], name='samples')
    show_latent_scatter(vae, mnist, name='latent')

    vae.save_generator('weights/vae_mnist/generator')
示例#18
0
 def load_vae(self):
     self.vae = VAE(self.args)
     vae_models_path = os.path.join(self.args.vae_dir, self.args.env_name,
                                    self.args.vae_model_name, 'models')
     off_utl.load_trained_vae(self.vae, vae_models_path)
# constants
ASIZE = 1
BSIZE = 16
SEQ_LEN = 140  # 4 seconds
LSIZE = 64
RSIZE = 512
epochs = 200

# Load VAE
vae_file = join(args.originallogdir, 'vae', 'best.tar')
assert exists(vae_file), "No trained VAE in the originallogdir..."
state = torch.load(vae_file, map_location={'cuda:0': str(device)})
print("Loading VAE at epoch {} "
      "with test error {}".format(state['epoch'], state['precision']))
vae = VAE(3, LSIZE).to(device)
vae.load_state_dict(state['state_dict'])
vae_optimizer = torch.optim.Adam(vae.parameters())
vae_scheduler = ReduceLROnPlateau(vae_optimizer, 'min', factor=0.5, patience=5)

# Load RNN
rnn_dir = join(args.originallogdir, 'mdrnn')
rnn_file = join(rnn_dir, 'best.tar')
assert exists(rnn_file), 'No trained MDNRNN in the originallogdir...'
mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5)
mdrnn.to(device)
mdrnn_optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9)
mdrnn_scheduler = ReduceLROnPlateau(mdrnn_optimizer,
                                    'min',
                                    factor=0.5,
                                    patience=5)
示例#20
0
	transforms.ToPILImage(),
	transforms.Resize((RED_SIZE, RED_SIZE)),
	# transforms.RandomHorizontalFlip(),
	transforms.ToTensor(),
])

transform_test = transforms.Compose([
	transforms.ToPILImage(),
	transforms.Resize((RED_SIZE, RED_SIZE)),
	transforms.ToTensor(),
])


trained=0
#model = VAE(3, LSIZE).to(device)
model=VAE(3, LSIZE)
model=torch.nn.DataParallel(model,device_ids=range(8))
model.cuda()
optimizer = optim.Adam(model.parameters(),lr=learning_rate,betas=(0.9,0.999))
model_p=VAE_a(7, LSIZE)
model_p=torch.nn.DataParallel(model_p,device_ids=range(8))
model_p.cuda()
optimizer_p = optim.Adam(model_p.parameters(),lr=learning_rate,betas=(0.9,0.999))
controller=Controller(LSIZE,3)
controller=torch.nn.DataParallel(controller,device_ids=range(8))
controller=controller.cuda()
optimizer_a = optim.SGD(controller.parameters(),lr=learning_rate*10)
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
# earlystopping = EarlyStopping('min', patience=30)

vis = visdom.Visdom(env='pa_train')
示例#21
0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# constants
BSIZE = 16
SEQ_LEN = 32
epochs = 30

# Loading VAE
vae_file = join(args.logdir, 'vae', 'best.tar')
assert exists(vae_file), "No trained VAE in the logdir..."
state = torch.load(vae_file)
print("Loading VAE at epoch {} "
      "with test error {}".format(
          state['epoch'], state['precision']))

vae = VAE(3, LSIZE).to(device)
vae.load_state_dict(state['state_dict'])

# Loading model
rnn_dir = join(args.logdir, 'mdrnn')
rnn_file = join(rnn_dir, 'best.tar')

if not exists(rnn_dir):
    mkdir(rnn_dir)

mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5)
mdrnn.to(device)
optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
earlystopping = EarlyStopping('min', patience=30)
def run_vae():
    seed = np.random.randint(1, 2147462579)

    def sinus_seq(period, samples, length):
        X = np.linspace(-np.pi * (samples / period),
                        np.pi * (samples / period), samples)
        X = np.reshape(np.sin(X), (-1, length))
        X += np.random.randn(*X.shape) * 0.1
        # X = (X - np.min(X))/(np.max(X) - np.min(X))
        return X, np.ones((samples / length, 1))

    X1, y1 = sinus_seq(40, 100000, 50)
    X2, y2 = sinus_seq(20, 40000, 50)

    X = np.concatenate((X1, X2)).astype('float32')
    y = np.concatenate((y1 * 0, y2 * 1), axis=0).astype('int')

    dim_samples, dim_features = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

    # X, y, users, stats = har.load()
    #
    # limited_labels = y < 5
    # y = y[limited_labels]
    # X = X[limited_labels]
    # users = users[limited_labels]
    #
    # # Compress labels
    # for idx, label in enumerate(np.unique(y)):
    #     if not np.equal(idx, label):
    #         y[y == label] = idx
    #
    # y_unique = np.unique(y)
    # y = one_hot(y, len(y_unique))
    #
    # dim_samples, dim_sequence, dim_features = X.shape
    # num_classes = len(y_unique)
    #
    # # Split into train and test stratified by users
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=users)

    # Combine in sets
    train_set = (X_train, y_train)
    test_set = (X_test, y_test)
    print('Train size: ', train_set[0].shape)
    print('Test size: ', test_set[0].shape)

    n, n_x = train_set[0].shape  # Datapoints in the dataset, input features.
    n_batches = n / 100  # The number of batches.
    bs = n / n_batches  # The batchsize.

    # Initialize the auxiliary deep generative model.
    model = VAE(n_x=int(n_x),
                n_z=16,
                z_hidden=[16],
                xhat_hidden=[32],
                x_dist='gaussian')

    # Get the training functions.
    f_train, f_test, f_validate, train_args, test_args, validate_args = model.build_model(
        train_set, test_set)
    # Update the default function arguments.
    train_args['inputs']['batchsize'] = 100
    train_args['inputs']['learningrate'] = 1e-3
    train_args['inputs']['beta1'] = 0.9
    train_args['inputs']['beta2'] = 0.999

    def custom_evaluation(model, path):
        plt.clf()
        f, axarr = plt.subplots(nrows=len(np.unique(y)), ncols=1)
        for idx, y_l in enumerate(np.unique(y)):
            act_idx = test_set[1] == y_l
            test_act = test_set[0][act_idx[:, 0]]

            z = model.f_qz(test_act, 1)
            xhat = model.f_px(z, 1)

            axarr[idx].plot(test_act[:3].reshape(-1, 1), color='red')
            axarr[idx].plot(xhat[:3].reshape(-1, 1),
                            color='blue',
                            linestyle='dotted')

        f.set_size_inches(8, 5)
        f.savefig(path, dpi=100, format='png')
        plt.close(f)

    # Define training loop. Output training evaluations every 1 epoch
    # and the custom evaluation method every 10 epochs.
    train = TrainModel(model=model,
                       output_freq=1,
                       pickle_f_custom_freq=100,
                       f_custom_eval=custom_evaluation)
    train.add_initial_training_notes("Training the rae with bn %s. seed %i." %
                                     (str(model.batchnorm), seed))
    train.train_model(f_train,
                      train_args,
                      f_test,
                      test_args,
                      f_validate,
                      validate_args,
                      n_train_batches=n_batches,
                      n_epochs=10000,
                      anneal=[("learningrate", 100, 0.75, 3e-5)])
示例#23
0
                                          transform_train,
                                          train=True)

dataset_test = RolloutObservationDataset('datasets/pacman',
                                         transform_test,
                                         train=False)
train_loader = torch.utils.data.DataLoader(dataset_train,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset_test,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          num_workers=2)

model = VAE(3, LSIZE).to(device)
optimizer = optim.Adam(model.parameters())
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
earlystopping = EarlyStopping('min', patience=30)


# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logsigma):
    """ VAE loss function """
    BCE = F.mse_loss(recon_x, x, size_average=False)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.sum(1 + 2 * logsigma - mu.pow(2) - (2 * logsigma).exp())
示例#24
0
                                         transform_test,
                                         train=False)
train_loader = torch.utils.data.DataLoader(dataset_train,
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=8,
                                           drop_last=True)
test_loader = torch.utils.data.DataLoader(dataset_test,
                                          batch_size=args.batch_size,
                                          shuffle=True,
                                          num_workers=8,
                                          drop_last=True)

trained = 0
#model = VAE(3, LSIZE).to(device)
model = VAE(3, LSIZE)
model = torch.nn.DataParallel(model, device_ids=range(8))
model.cuda()
optimizer = optim.Adam(model.parameters(),
                       lr=learning_rate,
                       betas=(0.9, 0.999))
# scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
# earlystopping = EarlyStopping('min', patience=30)

vis = visdom.Visdom(env='vae_pt')

ground_window = vis.image(
    np.random.rand(64, 64),
    opts=dict(title='ground!', caption='ground.'),
)
image_window = vis.image(
示例#25
0
                    action='store_true',
                    help='Does not save samples during training if specified')

args = parser.parse_args()
cuda = torch.cuda.is_available()
learning_rate = 1e-4

torch.manual_seed(914)
# Fix numeric divergence due to bug in Cudnn
torch.backends.cudnn.benchmark = True

device = torch.device("cuda" if cuda else "cpu")

trained = 0
#model = VAE(3, LSIZE).to(device)
model = VAE(3, LSIZE)
model = torch.nn.DataParallel(model, device_ids=range(8))
model.cuda()
model.eval()
# vis = visdom.Visdom(env='vae_pt')
#
# ground_window = vis.image(
#     np.random.rand(RED_SIZE*10, RED_SIZE*10),
#     opts=dict(title='ground!', caption='ground.'),
# )
# image_window = vis.image(
#     np.random.rand(RED_SIZE*10, RED_SIZE*10),
#     opts=dict(title='image!', caption='image.'),
# )
# vae_window = vis.image(
#     np.random.rand(RED_SIZE*10, RED_SIZE*10),
示例#26
0
def main(
    model_name,
    dataset,
    dataroot,
    download,
    augment,
    batch_size,
    eval_batch_size,
    epochs,
    saved_model,
    seed,
    hidden_channels,
    K,
    L,
    actnorm_scale,
    flow_permutation,
    flow_coupling,
    LU_decomposed,
    learn_top,
    y_condition,
    y_weight,
    max_grad_clip,
    max_grad_norm,
    lr,
    n_workers,
    cuda,
    n_init_batches,
    output_dir,
    saved_optimizer,
    warmup,
):

    vis = visdom.Visdom()
    env = "{}_{}".format(model_name, dataset)

    device = "cpu" if (not torch.cuda.is_available() or not cuda) else "cuda:0"

    check_manual_seed(seed)

    ds = check_dataset(dataset, dataroot, augment, download)
    image_shape, num_classes, train_dataset, test_dataset = ds

    # Note: unsupported for now
    multi_class = False

    train_loader = data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=n_workers,
        drop_last=True,
    )
    test_loader = data.DataLoader(
        test_dataset,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=n_workers,
        drop_last=False,
    )

    if model_name == "Glow":
        model = Glow(
            image_shape,
            hidden_channels,
            K,
            L,
            actnorm_scale,
            flow_permutation,
            flow_coupling,
            LU_decomposed,
            num_classes,
            learn_top,
            y_condition,
        )
    elif model_name == "VAE":
        model = VAE(
            image_shape,
            hidden_channels,
        )

    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=5e-5)

    lr_lambda = lambda epoch: min(1.0, (epoch + 1) / warmup)  # noqa
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=lr_lambda)

    train_loss_window = create_plot_window(vis, env, '#Iterations', 'Loss',
                                           'Training Loss')
    val_avg_loss_window = create_plot_window(vis, env, '#Epochs', 'Loss',
                                             'Validation Average Loss')
    train_image_window = create_image_window(vis, env, 'Training Images')

    def step(engine, batch):
        model.train()
        optimizer.zero_grad()

        x, y = batch
        x = x.to(device)

        if y_condition:
            y = y.to(device)
            z, nll, y_logits = model(x, y)
            losses = compute_loss_y(nll, y_logits, y_weight, y, multi_class)
        else:
            z, nll, y_logits, im = model(x)
            losses = compute_loss(nll)
        if engine.state.iteration % 250 == 1:
            vis.line(X=np.array([engine.state.iteration]),
                     Y=np.array([losses["total_loss"].item()]),
                     win=train_loss_window,
                     update='append',
                     env=env)
            vis.images(postprocess(im),
                       nrow=16,
                       win=train_image_window,
                       env=env)

        losses["total_loss"].backward()

        if max_grad_clip > 0:
            torch.nn.utils.clip_grad_value_(model.parameters(), max_grad_clip)
        if max_grad_norm > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()

        return losses

    def eval_step(engine, batch):
        model.eval()

        x, y = batch
        x = x.to(device)

        with torch.no_grad():
            if y_condition:
                y = y.to(device)
                z, nll, y_logits = model(x, y)
                losses = compute_loss_y(nll,
                                        y_logits,
                                        y_weight,
                                        y,
                                        multi_class,
                                        reduction="none")
            else:
                z, nll, y_logits, im = model(x)
                losses = compute_loss(nll, reduction="none")

        return losses

    trainer = Engine(step)
    checkpoint_handler = ModelCheckpoint(output_dir,
                                         model_name,
                                         save_interval=1,
                                         n_saved=5,
                                         require_empty=False)

    trainer.add_event_handler(
        Events.EPOCH_COMPLETED,
        checkpoint_handler,
        {
            "model": model,
            "optimizer": optimizer
        },
    )

    monitoring_metrics = ["total_loss"]
    RunningAverage(output_transform=lambda x: x["total_loss"]).attach(
        trainer, "total_loss")

    evaluator = Engine(eval_step)

    # Note: replace by https://github.com/pytorch/ignite/pull/524 when released
    Loss(
        lambda x, y: torch.mean(x),
        output_transform=lambda x: (
            x["total_loss"],
            torch.empty(x["total_loss"].shape[0]),
        ),
    ).attach(evaluator, "total_loss")

    if y_condition:
        monitoring_metrics.extend(["nll"])
        RunningAverage(output_transform=lambda x: x["nll"]).attach(
            trainer, "nll")

        # Note: replace by https://github.com/pytorch/ignite/pull/524 when released
        Loss(
            lambda x, y: torch.mean(x),
            output_transform=lambda x:
            (x["nll"], torch.empty(x["nll"].shape[0])),
        ).attach(evaluator, "nll")

    pbar = ProgressBar()
    pbar.attach(trainer, metric_names=monitoring_metrics)

    # load pre-trained model if given
    if saved_model:
        model.load_state_dict(torch.load(saved_model))
        model.set_actnorm_init()

        if saved_optimizer:
            optimizer.load_state_dict(torch.load(saved_optimizer))

        file_name, ext = os.path.splitext(saved_model)
        resume_epoch = int(file_name.split("_")[-1])

        @trainer.on(Events.STARTED)
        def resume_training(engine):
            engine.state.epoch = resume_epoch
            engine.state.iteration = resume_epoch * len(
                engine.state.dataloader)

    @trainer.on(Events.STARTED)
    def init(engine):
        model.train()

        init_batches = []
        init_targets = []

        with torch.no_grad():
            for batch, target in islice(train_loader, None, n_init_batches):
                init_batches.append(batch)
                init_targets.append(target)

            init_batches = torch.cat(init_batches).to(device)

            assert init_batches.shape[0] == n_init_batches * batch_size

            if y_condition:
                init_targets = torch.cat(init_targets).to(device)
                model(init_batches, init_targets)
            else:
                init_targets = None
                model(init_batches)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate(engine):
        evaluator.run(test_loader)

        scheduler.step()
        metrics = evaluator.state.metrics

        losses = ", ".join(
            [f"{key}: {value:.2f}" for key, value in metrics.items()])
        vis.line(X=np.array([engine.state.epoch]),
                 Y=np.array([metrics["total_loss"]]),
                 win=val_avg_loss_window,
                 update='append',
                 env=env)
        print(f"Validation Results - Epoch: {engine.state.epoch} {losses}")

    timer = Timer(average=True)
    timer.attach(
        trainer,
        start=Events.EPOCH_STARTED,
        resume=Events.ITERATION_STARTED,
        pause=Events.ITERATION_COMPLETED,
        step=Events.ITERATION_COMPLETED,
    )

    @trainer.on(Events.EPOCH_COMPLETED)
    def print_times(engine):
        pbar.log_message(
            f"Epoch {engine.state.epoch} done. Time per batch: {timer.value():.3f}[s]"
        )
        timer.reset()

    trainer.run(train_loader, epochs)
示例#27
0
    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data', train=True, download=True, transform=transforms.ToTensor()),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data', train=False, transform=transforms.ToTensor()),
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              **kwargs)

    in_channel = 1
    in_height = in_width = 28
    model = VAE(height=in_height,
                width=in_width,
                in_channel=in_channel,
                z_dim=args.z_dim,
                k=args.train_k).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)


def train_iwae(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)

        optimizer.zero_grad()
        x_prime, z, mu, logvar = model(data)
        loss = model.neg_elbo_iwae(data,
                                   x_prime,
示例#28
0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# constants
BSIZE = 16
SEQ_LEN = 32
epochs = 30

# Loading VAE
vae_file = join(args.logdir, 'vae', 'best.tar')
assert exists(vae_file), "No trained VAE in the logdir..."
state = torch.load(vae_file)
print("Loading VAE at epoch {} "
      "with test error {}".format(
          state['epoch'], state['precision']))

vae = VAE(3, LSIZE).to(device)
vae.load_state_dict(state['state_dict'])

# Loading model
rnn_dir = join(args.logdir, 'mdrnn')
rnn_file = join(rnn_dir, 'best.tar')

if not exists(rnn_dir):
    mkdir(rnn_dir)

mdrnn = MDRNN(LSIZE, ASIZE, RSIZE, 5)
mdrnn.to(device)
optimizer = torch.optim.RMSprop(mdrnn.parameters(), lr=1e-3, alpha=.9)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
earlystopping = EarlyStopping('min', patience=30)
示例#29
0
class OfflineMetaLearner:
    """
    Off-line Meta-Learner class, a.k.a no interaction with env.
    """
    def __init__(self, args):
        """
        Seeds everything.
        Initialises: logger, environments, policy (+storage +optimiser).
        """

        self.args = args

        # make sure everything has the same seed
        utl.seed(self.args.seed)

        # initialize tensorboard logger
        if self.args.log_tensorboard:
            self.tb_logger = TBLogger(self.args)

        self.args, env = off_utl.expand_args(self.args, include_act_space=True)
        if self.args.act_space.__class__.__name__ == "Discrete":
            self.args.policy = 'dqn'
        else:
            self.args.policy = 'sac'

        # load buffers with data
        if 'load_data' not in self.args or self.args.load_data:
            goals, augmented_obs_dim = self.load_buffer(
                env)  # env is input just for possible relabelling option
            self.args.augmented_obs_dim = augmented_obs_dim
            self.goals = goals

        # initialize policy
        self.initialize_policy()

        # load vae for inference in evaluation
        self.load_vae()

        # create environment for evaluation
        self.env = make_env(
            args.env_name,
            args.max_rollouts_per_task,
            presampled_tasks=args.presampled_tasks,
            seed=args.seed,
        )
        # n_tasks=self.args.num_eval_tasks)
        if self.args.env_name == 'GridNavi-v2':
            self.env.unwrapped.goals = [
                tuple(goal.astype(int)) for goal in self.goals
            ]

    def initialize_policy(self):
        if self.args.policy == 'dqn':
            q_network = FlattenMlp(input_size=self.args.augmented_obs_dim,
                                   output_size=self.args.act_space.n,
                                   hidden_sizes=self.args.dqn_layers).to(
                                       ptu.device)
            self.agent = DQN(
                q_network,
                # optimiser_vae=self.optimizer_vae,
                lr=self.args.policy_lr,
                gamma=self.args.gamma,
                tau=self.args.soft_target_tau,
            ).to(ptu.device)
        else:
            # assert self.args.act_space.__class__.__name__ == "Box", (
            #     "Can't train SAC with discrete action space!")
            q1_network = FlattenMlp(
                input_size=self.args.augmented_obs_dim + self.args.action_dim,
                output_size=1,
                hidden_sizes=self.args.dqn_layers).to(ptu.device)
            q2_network = FlattenMlp(
                input_size=self.args.augmented_obs_dim + self.args.action_dim,
                output_size=1,
                hidden_sizes=self.args.dqn_layers).to(ptu.device)
            policy = TanhGaussianPolicy(
                obs_dim=self.args.augmented_obs_dim,
                action_dim=self.args.action_dim,
                hidden_sizes=self.args.policy_layers).to(ptu.device)
            self.agent = SAC(
                policy,
                q1_network,
                q2_network,
                actor_lr=self.args.actor_lr,
                critic_lr=self.args.critic_lr,
                gamma=self.args.gamma,
                tau=self.args.soft_target_tau,
                use_cql=self.args.use_cql if 'use_cql' in self.args else False,
                alpha_cql=self.args.alpha_cql
                if 'alpha_cql' in self.args else None,
                entropy_alpha=self.args.entropy_alpha,
                automatic_entropy_tuning=self.args.automatic_entropy_tuning,
                alpha_lr=self.args.alpha_lr,
                clip_grad_value=self.args.clip_grad_value,
            ).to(ptu.device)

    def load_vae(self):
        self.vae = VAE(self.args)
        vae_models_path = os.path.join(self.args.vae_dir, self.args.env_name,
                                       self.args.vae_model_name, 'models')
        off_utl.load_trained_vae(self.vae, vae_models_path)

    def load_buffer(self, env):
        if self.args.hindsight_relabelling:  # without arr_type loading -- GPU will explode
            dataset, goals = off_utl.load_dataset(
                data_dir=self.args.relabelled_data_dir,
                args=self.args,
                num_tasks=self.args.num_train_tasks,
                allow_dense_data_loading=False,
                arr_type='numpy')
            dataset = off_utl.batch_to_trajectories(dataset, self.args)
            dataset, goals = off_utl.mix_task_rollouts(
                dataset, env, goals, self.args)  # reward relabelling
            dataset = off_utl.trajectories_to_batch(dataset)
        else:
            dataset, goals = off_utl.load_dataset(
                data_dir=self.args.relabelled_data_dir,
                args=self.args,
                num_tasks=self.args.num_train_tasks,
                allow_dense_data_loading=False,
                arr_type='numpy')
        augmented_obs_dim = dataset[0][0].shape[1]
        self.storage = MultiTaskPolicyStorage(
            max_replay_buffer_size=max([d[0].shape[0] for d in dataset]),
            obs_dim=dataset[0][0].shape[1],
            action_space=self.args.act_space,
            tasks=range(len(goals)),
            trajectory_len=self.args.trajectory_len)
        for task, set in enumerate(dataset):
            self.storage.add_samples(task,
                                     observations=set[0],
                                     actions=set[1],
                                     rewards=set[2],
                                     next_observations=set[3],
                                     terminals=set[4])
        return goals, augmented_obs_dim

    def train(self):
        self._start_training()
        for iter_ in range(self.args.num_iters):
            self.training_mode(True)
            indices = np.random.choice(len(self.goals), self.args.meta_batch)
            train_stats = self.update(indices)

            self.training_mode(False)
            self.log(iter_ + 1, train_stats)

    def update(self, tasks):
        rl_losses_agg = {}
        for update in range(self.args.rl_updates_per_iter):
            # sample random RL batch
            obs, actions, rewards, next_obs, terms = self.sample_rl_batch(
                tasks, self.args.batch_size)
            # flatten out task dimension
            t, b, _ = obs.size()
            obs = obs.view(t * b, -1)
            actions = actions.view(t * b, -1)
            rewards = rewards.view(t * b, -1)
            next_obs = next_obs.view(t * b, -1)
            terms = terms.view(t * b, -1)

            # RL update
            rl_losses = self.agent.update(obs,
                                          actions,
                                          rewards,
                                          next_obs,
                                          terms,
                                          action_space=self.env.action_space)

            for k, v in rl_losses.items():
                if update == 0:  # first iterate - create list
                    rl_losses_agg[k] = [v]
                else:  # append values
                    rl_losses_agg[k].append(v)
        # take mean
        for k in rl_losses_agg:
            rl_losses_agg[k] = np.mean(rl_losses_agg[k])
        self._n_rl_update_steps_total += self.args.rl_updates_per_iter

        return rl_losses_agg

    def evaluate(self):
        num_episodes = self.args.max_rollouts_per_task
        num_steps_per_episode = self.env.unwrapped._max_episode_steps
        num_tasks = self.args.num_eval_tasks
        obs_size = self.env.unwrapped.observation_space.shape[0]

        returns_per_episode = np.zeros((num_tasks, num_episodes))
        success_rate = np.zeros(num_tasks)

        rewards = np.zeros((num_tasks, self.args.trajectory_len))
        reward_preds = np.zeros((num_tasks, self.args.trajectory_len))
        observations = np.zeros(
            (num_tasks, self.args.trajectory_len + 1, obs_size))
        if self.args.policy == 'sac':
            log_probs = np.zeros((num_tasks, self.args.trajectory_len))

        # This part is very specific for the Semi-Circle env
        # if self.args.env_name == 'PointRobotSparse-v0':
        #     reward_belief = np.zeros((num_tasks, self.args.trajectory_len))
        #
        #     low_x, high_x, low_y, high_y = -2., 2., -1., 2.
        #     resolution = 0.1
        #     grid_x = np.arange(low_x, high_x + resolution, resolution)
        #     grid_y = np.arange(low_y, high_y + resolution, resolution)
        #     centers_x = (grid_x[:-1] + grid_x[1:]) / 2
        #     centers_y = (grid_y[:-1] + grid_y[1:]) / 2
        #     yv, xv = np.meshgrid(centers_y, centers_x, sparse=False, indexing='ij')
        #     centers = np.vstack([xv.ravel(), yv.ravel()]).T
        #     n_grid_points = centers.shape[0]
        #     reward_belief_discretized = np.zeros((num_tasks, self.args.trajectory_len, centers.shape[0]))

        for task_loop_i, task in enumerate(
                self.env.unwrapped.get_all_eval_task_idx()):
            obs = ptu.from_numpy(self.env.reset(task))
            obs = obs.reshape(-1, obs.shape[-1])
            step = 0

            # get prior parameters
            with torch.no_grad():
                task_sample, task_mean, task_logvar, hidden_state = self.vae.encoder.prior(
                    batch_size=1)

            observations[task_loop_i,
                         step, :] = ptu.get_numpy(obs[0, :obs_size])

            for episode_idx in range(num_episodes):
                running_reward = 0.
                for step_idx in range(num_steps_per_episode):
                    # add distribution parameters to observation - policy is conditioned on posterior
                    augmented_obs = self.get_augmented_obs(
                        obs, task_mean, task_logvar)
                    if self.args.policy == 'dqn':
                        action, value = self.agent.act(obs=augmented_obs,
                                                       deterministic=True)
                    else:
                        action, _, _, log_prob = self.agent.act(
                            obs=augmented_obs,
                            deterministic=self.args.eval_deterministic,
                            return_log_prob=True)

                    # observe reward and next obs
                    next_obs, reward, done, info = utl.env_step(
                        self.env, action.squeeze(dim=0))
                    running_reward += reward.item()
                    # done_rollout = False if ptu.get_numpy(done[0][0]) == 0. else True
                    # update encoding
                    task_sample, task_mean, task_logvar, hidden_state = self.update_encoding(
                        obs=next_obs,
                        action=action,
                        reward=reward,
                        done=done,
                        hidden_state=hidden_state)
                    rewards[task_loop_i, step] = reward.item()
                    reward_preds[task_loop_i, step] = ptu.get_numpy(
                        self.vae.reward_decoder(task_sample, next_obs, obs,
                                                action)[0, 0])

                    # This part is very specific for the Semi-Circle env
                    # if self.args.env_name == 'PointRobotSparse-v0':
                    #     reward_belief[task, step] = ptu.get_numpy(
                    #         self.vae.compute_belief_reward(task_mean, task_logvar, obs, next_obs, action)[0])
                    #
                    #     reward_belief_discretized[task, step, :] = ptu.get_numpy(
                    #         self.vae.compute_belief_reward(task_mean.repeat(n_grid_points, 1),
                    #                                        task_logvar.repeat(n_grid_points, 1),
                    #                                        None,
                    #                                        torch.cat((ptu.FloatTensor(centers),
                    #                                                   ptu.zeros(centers.shape[0], 1)), dim=-1).unsqueeze(0),
                    #                                        None)[:, 0])

                    observations[task_loop_i, step + 1, :] = ptu.get_numpy(
                        next_obs[0, :obs_size])
                    if self.args.policy != 'dqn':
                        log_probs[task_loop_i,
                                  step] = ptu.get_numpy(log_prob[0])

                    if "is_goal_state" in dir(
                            self.env.unwrapped
                    ) and self.env.unwrapped.is_goal_state():
                        success_rate[task_loop_i] = 1.
                    # set: obs <- next_obs
                    obs = next_obs.clone()
                    step += 1

                returns_per_episode[task_loop_i, episode_idx] = running_reward

        if self.args.policy == 'dqn':
            return returns_per_episode, success_rate, observations, rewards, reward_preds
        # This part is very specific for the Semi-Circle env
        # elif self.args.env_name == 'PointRobotSparse-v0':
        #     return returns_per_episode, success_rate, log_probs, observations, \
        #            rewards, reward_preds, reward_belief, reward_belief_discretized, centers
        else:
            return returns_per_episode, success_rate, log_probs, observations, rewards, reward_preds

    def log(self, iteration, train_stats):
        # --- save model ---
        if iteration % self.args.save_interval == 0:
            save_path = os.path.join(self.tb_logger.full_output_folder,
                                     'models')
            if not os.path.exists(save_path):
                os.mkdir(save_path)
            torch.save(
                self.agent.state_dict(),
                os.path.join(save_path, "agent{0}.pt".format(iteration)))

        if iteration % self.args.log_interval == 0:
            if self.args.policy == 'dqn':
                returns, success_rate, observations, rewards, reward_preds = self.evaluate(
                )
            # This part is super specific for the Semi-Circle env
            # elif self.args.env_name == 'PointRobotSparse-v0':
            #     returns, success_rate, log_probs, observations, \
            #     rewards, reward_preds, reward_belief, reward_belief_discretized, points = self.evaluate()
            else:
                returns, success_rate, log_probs, observations, rewards, reward_preds = self.evaluate(
                )

            if self.args.log_tensorboard:
                tasks_to_vis = np.random.choice(self.args.num_eval_tasks, 5)
                for i, task in enumerate(tasks_to_vis):
                    self.env.reset(task)
                    if PLOT_VIS:
                        self.tb_logger.writer.add_figure(
                            'policy_vis/task_{}'.format(i),
                            utl_eval.plot_rollouts(observations[task, :],
                                                   self.env),
                            self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_figure(
                        'reward_prediction_train/task_{}'.format(i),
                        utl_eval.plot_rew_pred_vs_rew(rewards[task, :],
                                                      reward_preds[task, :]),
                        self._n_rl_update_steps_total)
                    # self.tb_logger.writer.add_figure('reward_prediction_train/task_{}'.format(i),
                    #                                  utl_eval.plot_rew_pred_vs_reward_belief_vs_rew(rewards[task, :],
                    #                                                                                 reward_preds[task, :],
                    #                                                                                 reward_belief[task, :]),
                    #                                  self._n_rl_update_steps_total)
                    # if self.args.env_name == 'PointRobotSparse-v0':     # This part is super specific for the Semi-Circle env
                    #     for t in range(0, int(self.args.trajectory_len/4), 3):
                    #         self.tb_logger.writer.add_figure('discrete_belief_reward_pred_task_{}/timestep_{}'.format(i, t),
                    #                                          utl_eval.plot_discretized_belief_halfcircle(reward_belief_discretized[task, t, :],
                    #                                                                                      points, self.env,
                    #                                                                                      observations[task, :t+1]),
                    #                                          self._n_rl_update_steps_total)
                if self.args.max_rollouts_per_task > 1:
                    for episode_idx in range(self.args.max_rollouts_per_task):
                        self.tb_logger.writer.add_scalar(
                            'returns_multi_episode/episode_{}'.format(
                                episode_idx + 1),
                            np.mean(returns[:, episode_idx]),
                            self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'returns_multi_episode/sum',
                        np.mean(np.sum(returns, axis=-1)),
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'returns_multi_episode/success_rate',
                        np.mean(success_rate), self._n_rl_update_steps_total)
                else:
                    self.tb_logger.writer.add_scalar(
                        'returns/returns_mean', np.mean(returns),
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'returns/returns_std', np.std(returns),
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'returns/success_rate', np.mean(success_rate),
                        self._n_rl_update_steps_total)
                if self.args.policy == 'dqn':
                    self.tb_logger.writer.add_scalar(
                        'rl_losses/qf_loss_vs_n_updates',
                        train_stats['qf_loss'], self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/q_network',
                        list(self.agent.qf.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.qf.parameters())[0].grad is not None:
                        param_list = list(self.agent.qf.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q_network',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/q_target',
                        list(self.agent.target_qf.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.target_qf.parameters()
                            )[0].grad is not None:
                        param_list = list(self.agent.target_qf.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q_target',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                else:
                    self.tb_logger.writer.add_scalar(
                        'policy/log_prob', np.mean(log_probs),
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'rl_losses/qf1_loss', train_stats['qf1_loss'],
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'rl_losses/qf2_loss', train_stats['qf2_loss'],
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'rl_losses/policy_loss', train_stats['policy_loss'],
                        self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'rl_losses/alpha_entropy_loss',
                        train_stats['alpha_entropy_loss'],
                        self._n_rl_update_steps_total)

                    # weights and gradients
                    self.tb_logger.writer.add_scalar(
                        'weights/q1_network',
                        list(self.agent.qf1.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.qf1.parameters())[0].grad is not None:
                        param_list = list(self.agent.qf1.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q1_network',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/q1_target',
                        list(self.agent.qf1_target.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.qf1_target.parameters()
                            )[0].grad is not None:
                        param_list = list(self.agent.qf1_target.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q1_target',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/q2_network',
                        list(self.agent.qf2.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.qf2.parameters())[0].grad is not None:
                        param_list = list(self.agent.qf2.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q2_network',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/q2_target',
                        list(self.agent.qf2_target.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.qf2_target.parameters()
                            )[0].grad is not None:
                        param_list = list(self.agent.qf2_target.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/q2_target',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)
                    self.tb_logger.writer.add_scalar(
                        'weights/policy',
                        list(self.agent.policy.parameters())[0].mean(),
                        self._n_rl_update_steps_total)
                    if list(self.agent.policy.parameters()
                            )[0].grad is not None:
                        param_list = list(self.agent.policy.parameters())
                        self.tb_logger.writer.add_scalar(
                            'gradients/policy',
                            sum([
                                param_list[i].grad.mean()
                                for i in range(len(param_list))
                            ]), self._n_rl_update_steps_total)

            for k, v in [
                ('num_rl_updates', self._n_rl_update_steps_total),
                ('time_elapsed', time.time() - self._start_time),
                ('iteration', iteration),
            ]:
                self.tb_logger.writer.add_scalar(k, v,
                                                 self._n_rl_update_steps_total)
            self.tb_logger.finish_iteration(iteration)

            print(
                "Iteration -- {}, Success rate -- {:.3f}, Avg. return -- {:.3f}, Elapsed time {:5d}[s]"
                .format(iteration, np.mean(success_rate),
                        np.mean(np.sum(returns, axis=-1)),
                        int(time.time() - self._start_time)))

    def sample_rl_batch(self, tasks, batch_size):
        ''' sample batch of unordered rl training data from a list/array of tasks '''
        # this batch consists of transitions sampled randomly from replay buffer
        batches = [
            ptu.np_to_pytorch_batch(self.storage.random_batch(
                task, batch_size)) for task in tasks
        ]
        unpacked = [utl.unpack_batch(batch) for batch in batches]
        # group elements together
        unpacked = [[x[i] for x in unpacked] for i in range(len(unpacked[0]))]
        unpacked = [torch.cat(x, dim=0) for x in unpacked]
        return unpacked

    def _start_training(self):
        self._n_rl_update_steps_total = 0
        self._start_time = time.time()

    def training_mode(self, mode):
        self.agent.train(mode)

    def update_encoding(self, obs, action, reward, done, hidden_state):
        # reset hidden state of the recurrent net when the task is done
        hidden_state = self.vae.encoder.reset_hidden(hidden_state, done)
        with torch.no_grad():  # size should be (batch, dim)
            task_sample, task_mean, task_logvar, hidden_state = self.vae.encoder(
                actions=action.float(),
                states=obs,
                rewards=reward,
                hidden_state=hidden_state,
                return_prior=False)

        return task_sample, task_mean, task_logvar, hidden_state

    @staticmethod
    def get_augmented_obs(obs, mean, logvar):
        mean = mean.reshape((-1, mean.shape[-1]))
        logvar = logvar.reshape((-1, logvar.shape[-1]))
        return torch.cat((obs, mean, logvar), dim=-1)

    def load_model(self, agent_path, device='cpu'):
        self.agent.load_state_dict(torch.load(agent_path, map_location=device))
        self.load_vae()
        self.training_mode(False)
示例#30
0
def _train_vae(log_dir,
               offline_buffer_path,
               saved_tasks_path,
               env_type,
               seed,
               path_length,
               meta_episode_len,
               load_buffer_kwargs=None,
               **kwargs):
    with open(os.path.join(log_dir, 'test.txt'), 'w') as f:
        f.write("hello from train_vae_offline.py")
    if load_buffer_kwargs is None:
        load_buffer_kwargs = {}
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    parser = argparse.ArgumentParser()
    # parser.add_argument('--env-type', default='gridworld')
    # parser.add_argument('--env-type', default='point_robot_sparse')
    # parser.add_argument('--env-type', default='cheetah_vel')
    parser.add_argument('--env-type', default='ant_semicircle_sparse')
    extra_args = []
    for k, v in kwargs.items():
        extra_args.append('--{}'.format(k))
        extra_args.append(str(v))
    args, rest_args = parser.parse_known_args(args=extra_args)

    # --- GridWorld ---
    if env_type == 'cheetah_vel':
        args = args_cheetah_vel.get_args(rest_args)
        args.env_name = 'HalfCheetahVel-v0'
    elif env_type == 'ant_dir':
        # TODO: replace with ant_dir env
        args = args_ant_semicircle_sparse.get_args(rest_args)
        parser.add_argument('--env-name', default='AntSemiCircleSparse-v0')
        args.env_name = 'AntDir-v0'
    elif env_type == 'walker':
        args = args_walker_param.get_args(rest_args)
    elif env_type == 'hopper':
        args = args_hopper_param.get_args(rest_args)
    elif env_type == 'humanoid':
        args = args_humanoid_dir.get_args(rest_args)
    else:
        raise ValueError('Unknown env_type: {}'.format(env_type))

    set_gpu_mode(torch.cuda.is_available() and args.use_gpu)

    args, env = off_utl.expand_args(args)
    args.save_dir = os.path.join(log_dir, 'trained_vae')

    args.trajectory_len = path_length
    task_data = joblib.load(saved_tasks_path)
    tasks = task_data['tasks']
    print("loading dataset")
    with open(os.path.join(log_dir, 'tmp1.txt'), 'w') as f:
        f.write("train_vae_offline.py: start loading dataset")
    dataset, goals = off_utl.load_pearl_buffer(
        pretrain_buffer_path=offline_buffer_path,
        tasks=tasks,
        add_done_info=env.add_done_info,
        path_length=path_length,
        meta_episode_len=meta_episode_len,
        **load_buffer_kwargs)
    with open(os.path.join(log_dir, 'tmp1.txt'), 'a') as f:
        f.write("train_vae_offline.py: done loading dataset")
    print("done loading dataset")
    for data in dataset:
        print(data[0].shape)

    dataset = [[x.astype(np.float32) for x in d] for d in dataset]

    if args.save_model:
        dir_prefix = args.save_dir_prefix if hasattr(args, 'save_dir_prefix') \
                                             and args.save_dir_prefix is not None else ''
        args.full_save_path = os.path.join(
            args.save_dir, args.env_name,
            dir_prefix + datetime.datetime.now().strftime('__%d_%m_%H_%M_%S'))
        os.makedirs(args.full_save_path, exist_ok=True)
        config_utl.save_config_file(args, args.full_save_path)

    vae = VAE(args)
    train(vae, dataset, args)
示例#31
0
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from models import *
from utils import *
from train import *
from models.vae import VAE 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SOS_token = 0 # Start Of Sentence token
EOS_token = 1 # End Of Sentence token
MAX_LENGTH = 16
max_length = 16

if __name__ =='__main__':
    #----------Hyper Parameters----------#
    hidden_size = 256
    cond_size = 4
    latent_size = 32
    vocab_size = 28 #The number of vocabulary

    vae = VAE(vocab_size, hidden_size, latent_size, cond_size, vocab_size).to(device)

    words, tenses = prepare_data()
    data = MyData()
    data_loader = DataLoader(data, batch_size=32, shuffle=True, collate_fn=collate_fn)

    history = trainEpochs(vae, data_loader, n_epochs=5000, learning_rate=0.001, verbose=False)
    save_model(vae, model_name='vae_5000')
示例#32
0
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
  return env
transform = transforms.Compose([
  transforms.ToPILImage(),
  transforms.Resize((64, 64)),
  # transforms.RandomHorizontalFlip(),
  transforms.ToTensor(),
])
# from https://github.com/openai/gym/blob/master/gym/envs/box2d/car_racing.py
if __name__=="__main__":
  model=VAE(3, 64)
  model=torch.nn.DataParallel(model,device_ids=range(1))
  model.cuda()
  controller=Controller_class(64,3)
  controller=torch.nn.DataParallel(controller,device_ids=range(1))
  controller=controller.cuda()
  state = torch.load('/home/ld/gym-car/log/class/contorl_checkpoint_10.pkl')
  controller.load_state_dict(state['state_dict'])
  print('contorller load success')
  state = torch.load('/home/ld/gym-car/log/class/vae_checkpoint_10.pkl')
  model.load_state_dict(state['state_dict'])
  print('vae load success')
  # from pyglet.window import key
  action = np.array( [0.0, 0.0, 0.0] )
  # def key_press(k, mod):
  #   global restart