Exemplo n.º 1
0
    def load_model(self,
                   attributes_path,
                   discriminator_path=None,
                   encoder_path=None,
                   actor_path=None,
                   critic_path=None,
                   value_path=None):
        self.off_policy_algorithm.load_model(actor_path=actor_path,
                                             critic_path=critic_path,
                                             value_path=value_path)
        print_heading(
            "Loading models from paths: \n discriminator:{} \n encoder:{} \n attributes:{}"
            .format(discriminator_path, encoder_path, attributes_path))
        if discriminator_path is not None:
            self.discriminator.load_state_dict(torch.load(discriminator_path))
        if encoder_path is not None:
            self.encoder.load_state_dict(torch.load(encoder_path))

        with open(attributes_path, "rb") as f:
            attributes = pickle.load(f)

        self.current_iteration = attributes["current_iteration"]
        self.beta = attributes["beta"]
        self.policy_update_count = attributes["policy_update_count"]
        self.max_reward = attributes["max_reward"]

        print('loading done')
Exemplo n.º 2
0
    def save_model(self,
                   env_name,
                   attributes_path=None,
                   all_nets_path=None,
                   discriminator_path=None,
                   encoder_path=None,
                   actor_path=None,
                   critic_path=None,
                   value_path=None,
                   info="none"):
        self.off_policy_algorithm.save_model(env_name=env_name,
                                             all_nets_path=all_nets_path,
                                             actor_path=actor_path,
                                             critic_path=critic_path,
                                             value_path=value_path,
                                             info=info)
        if all_nets_path is not None:
            discriminator_path = all_nets_path
            encoder_path = all_nets_path

        if discriminator_path is None:
            discriminator_path = f'model/{env_name}/'
        os.makedirs(discriminator_path, exist_ok=True)

        if encoder_path is None:
            encoder_path = f'model/{env_name}/'
        os.makedirs(encoder_path, exist_ok=True)

        if attributes_path is None:
            attributes_path = f"attributes/{env_name}"
        os.makedirs(attributes_path, exist_ok=True)

        print_heading("Saving discriminator and encoder network parameters")
        torch.save(self.discriminator.state_dict(),
                   discriminator_path + f"discriminator_{info}.pt")
        torch.save(self.encoder.state_dict(),
                   encoder_path + f"encoder_{info}.pt")

        with open(attributes_path + "attributes.pkl", "wb") as f:
            pickle.dump(
                {
                    "current_iteration": self.current_iteration,
                    "beta": self.beta,
                    "policy_update_count": self.policy_update_count,
                    "max_reward": self.max_reward
                }, f)
        heading_decorator(bottom=True, print_req=True)
Exemplo n.º 3
0
                  beta_init=args.beta_init,
                  learning_rate_decay=22,
                  learning_rate_decay_training_steps=22,
                  optimizer=optimizer,
                  discriminator_weight_decay=args.discriminator_weight_decay,
                  gp_lambda=args.gp_lambda,
                  encoder_weight_decay=args.encoder_weight_decay,
                  information_constraint=args.information_constraint,
                  grad_clip=args.grad_clip,
                  loss_clip=args.loss_clip,
                  clip_val_grad=args.clip_val_grad,
                  clip_val_loss=args.clip_val_loss,
                  batch_size=args.batch_size)

# Test expert buffer. Size and wrapping
print_heading("Trajectory Length")
print(obsvail.trajectory_length)

print_heading("Expert buffer length")
print(len(obsvail.expert_buffer))

expert_trajectory = obsvail.expert_buffer.sample(batch_size=1)["trajectory"]
print_heading("Sampled expert trajectory details")
print("Trajectory Length ".ljust(50), len(expert_trajectory))
print("Absorbing indicator for 1st state".ljust(50),
      expert_trajectory[0]["is_absorbing"])
print("Absorbing indicator for last state".ljust(50),
      expert_trajectory[-1]["is_absorbing"])
print("Absorbing indicator for 2nd last state".ljust(50),
      expert_trajectory[-2]["is_absorbing"])
Exemplo n.º 4
0
from robo_rl.common import TrajectoryBuffer
from robo_rl.common import print_heading

trajectory_buffer = TrajectoryBuffer(1000)

observations = [[i + j for j in range(2)] for i in range(5)]
trajectory1 = [observations[0], observations[1]]
trajectory2 = [observations[4], observations[2]]
trajectory_buffer.add(trajectory1)
trajectory_buffer.add(trajectory2)

print_heading("Sample trajectory")
print(trajectory_buffer.sample(batch_size=2))

print_heading("Sample at particular timestep")
print(trajectory_buffer.sample_timestep(batch_size=2, timestep=1))
Exemplo n.º 5
0
activation_function = torchfunc.elu
num_networks = 3

pfnn = LinearPFNN(layers_size=layers_size,
                  final_layer_function=final_layer_function,
                  activation_function=activation_function,
                  num_networks=num_networks)

input_tensor = torch.Tensor([0.291])
phase = 0.70

x = {"input": input_tensor, "phase": phase}

input_tensor = x["input"]

print_heading("Phase and corresponding indices")
phase = x["phase"]
# Enforce phase in [0,1)
phase = phase - int(phase)
print("Phase".ljust(25), phase)

# Get indices for interval endpoints
left_index = int(phase * num_networks) % num_networks
right_index = (left_index + 1) % num_networks
left_phase = left_index / num_networks
right_phase = left_phase + (1 / num_networks)

# phase = weight * left_phase + (1-weight) * right_phase
weight = (right_phase - phase) * num_networks

print("Num Networks".ljust(25), num_networks)