args = parser.parse_args() #-----Environment-----# width = height = 12 obstacles = create_obstacles(width, height) set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles)) start_loc = sample_start(set_diff) s = State(start_loc, obstacles) T = TransitionFunction(width, height, obstacle_movement) if args.expert_path == 'SR2_expert_trajectories/': R = RewardFunction_SR2(-1.0,1.0,width) else: R = RewardFunction(-1.0,1.0) num_inputs = s.state.shape[0] num_actions = 4 if args.expert_path == 'SR2_expert_trajectories/': num_c = 2 else: num_c = 4 #env.seed(args.seed) torch.manual_seed(args.seed) policy_net = GRU(num_inputs, 0, num_c, num_actions,
args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) #-----Environment-----# width = height = 12 obstacles = create_obstacles(width, height) T = TransitionFunction(width, height, obstacle_movement) if args.expert_path == 'SR2_expert_trajectories/': R = RewardFunction_SR2(-1.0, 1.0, width) else: R = RewardFunction(-1.0, 1.0) class VAE(nn.Module): def __init__(self): super(VAE, self).__init__() self.policy = Policy(state_size=8, action_size=0, latent_size=2, output_size=4, hidden_size=64, output_activation='sigmoid') self.posterior = Posterior(state_size=8, action_size=0, latent_size=2,