示例#1
0
args = parser.parse_args()


#-----Environment-----#
width = height = 12
obstacles = create_obstacles(width, height)
set_diff = list(set(product(tuple(range(3, width-3)), repeat=2)) - set(obstacles))
start_loc = sample_start(set_diff)

s = State(start_loc, obstacles)
T = TransitionFunction(width, height, obstacle_movement)

if args.expert_path == 'SR2_expert_trajectories/':
    R = RewardFunction_SR2(-1.0,1.0,width)
else:
    R = RewardFunction(-1.0,1.0)

num_inputs = s.state.shape[0]
num_actions = 4
if args.expert_path == 'SR2_expert_trajectories/':
    num_c = 2
else:
    num_c = 4

#env.seed(args.seed)
torch.manual_seed(args.seed)

policy_net = GRU(num_inputs,
                 0,
                 num_c,
                 num_actions,
示例#2
0
args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

#-----Environment-----#
width = height = 12
obstacles = create_obstacles(width, height)

T = TransitionFunction(width, height, obstacle_movement)

if args.expert_path == 'SR2_expert_trajectories/':
    R = RewardFunction_SR2(-1.0, 1.0, width)
else:
    R = RewardFunction(-1.0, 1.0)


class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()

        self.policy = Policy(state_size=8,
                             action_size=0,
                             latent_size=2,
                             output_size=4,
                             hidden_size=64,
                             output_activation='sigmoid')
        self.posterior = Posterior(state_size=8,
                                   action_size=0,
                                   latent_size=2,