Пример #1
0
def experiment(variant):
    from simple_sup import SimpleSupEnv
    expl_env = SimpleSupEnv(**variant['env_kwars'])
    eval_env = SimpleSupEnv(**variant['env_kwars'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n

    encoder = nn.Sequential(
        nn.Linear(obs_dim, 16),
        nn.ReLU(),
    )
    decoder = nn.Linear(16, action_dim)
    from layers import ReshapeLayer
    sup_learner = nn.Sequential(
        nn.Linear(16, action_dim),
        ReshapeLayer(shape=(1, action_dim)),
    )
    from sup_softmax_policy import SupSoftmaxPolicy
    policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)

    vf = Mlp(
        hidden_sizes=[32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=1,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.trpo_sup import TRPOSupTrainer
    trainer = TRPOSupTrainer(policy=policy,
                             value_function=vf,
                             vf_criterion=vf_criterion,
                             replay_buffer=replay_buffer,
                             **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #2
0
def experiment(variant):
    from simple_sup import SimpleSupEnv
    expl_env = SimpleSupEnv(**variant['env_kwars'])
    eval_env = SimpleSupEnv(**variant['env_kwars'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n

    hidden_dim = variant['hidden_dim']
    encoder = nn.Sequential(
             nn.Linear(obs_dim,hidden_dim),
             nn.ReLU(),
             nn.Linear(hidden_dim,hidden_dim),
             nn.ReLU(),
            )
    decoder = nn.Linear(hidden_dim, action_dim)
    from layers import ReshapeLayer
    sup_learner = nn.Sequential(
            nn.Linear(hidden_dim, action_dim),
            ReshapeLayer(shape=(1, action_dim)),
        )
    from sup_softmax_policy import SupSoftmaxPolicy
    policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)
    print('parameters: ',np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

    vf = Mlp(
        hidden_sizes=[32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy,use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )

    from rlkit.torch.vpg.ppo_sup_online import PPOSupOnlineTrainer
    trainer = PPOSupOnlineTrainer(
        policy=policy,
        value_function=vf,
        vf_criterion=vf_criterion,
        **variant['trainer_kwargs']
    )
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        **variant['algorithm_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()
Пример #3
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    encoder = nn.Sequential(
        nn.Linear(obs_dim, 32),
        nn.ReLU(),
        nn.Linear(32, 32),
        nn.ReLU(),
    )
    decoder = nn.Linear(32, action_dim)
    from layers import ReshapeLayer
    sup_learner = nn.Sequential(
        nn.Linear(32, int(label_num * label_dim)),
        ReshapeLayer(shape=(label_num, label_dim)),
    )
    from sup_softmax_policy import SupSoftmaxPolicy
    policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)
    print('parameters: ',
          np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

    vf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    trainer = TRPOTrainer(policy=policy,
                          value_function=vf,
                          vf_criterion=vf_criterion,
                          **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #4
0
def build_generator(input_var=None, use_batch_norm=True):
    from lasagne.layers import InputLayer, batch_norm
    from layers import (Lipshitz_Layer, LipConvLayer, Subpixel_Layer,
                        ReshapeLayer, FlattenLayer)

    layer = InputLayer(shape=(None, 10), input_var=input_var)
    if use_batch_norm:
        raise NotImplementedError
    else:
        layer = Lipshitz_Layer(layer, 512 * 7 * 7, init=1)
        layer = ReshapeLayer(layer, (-1, 512, 7, 7))
        layer = Subpixel_Layer(layer, 256, (3, 3), 2)
        layer = Subpixel_Layer(layer, 128, (3, 3), 2)
        layer = Subpixel_Layer(layer, 64, (3, 3), 2)
        layer = LipConvLayer(layer,
                             1, (1, 1),
                             init=1,
                             nonlinearity=lasagne.nonlinearities.sigmoid)
        layer = ReshapeLayer(layer, (-1, 784))
    print("Generator output:", layer.output_shape)
    print("Number of parameters:", lasagne.layers.count_params(layer))
    return layer
Пример #5
0
def build_discriminator(input_var=None, use_batch_norm=True):
    from lasagne.layers import InputLayer, batch_norm
    from layers import (Lipshitz_Layer, LipConvLayer, Subpixel_Layer,
                        ReshapeLayer, FlattenLayer)

    layer = InputLayer(shape=(None, 784), input_var=input_var)
    if use_batch_norm:
        raise NotImplementedError
    else:
        layer = ReshapeLayer(layer, (-1, 1, 28, 28))
        layer = LipConvLayer(layer, 16, (5, 5), init=1)
        layer = LipConvLayer(layer, 32, (5, 5), init=1)
        layer = LipConvLayer(layer, 64, (5, 5), init=1)
        layer = LipConvLayer(layer, 128, (5, 5), init=1)
        layer = FlattenLayer(layer)
        layer = Lipshitz_Layer(layer, 512, init=1)
        layer = Lipshitz_Layer(layer,
                               1 + 10,
                               init=1,
                               nonlinearity=lasagne.nonlinearities.sigmoid)

    print("Discriminator output:", layer.output_shape)
    print("Number of parameters:", lasagne.layers.count_params(layer))
    return layer
Пример #6
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim
    max_path_length = variant['trainer_kwargs']['max_path_length']

    if variant['load_kwargs']['load']:
        load_dir = variant['load_kwargs']['load_dir']
        load_data = torch.load(load_dir + '/params.pkl', map_location='cpu')
        policy = load_data['trainer/policy']
        vf = load_data['trainer/value_function']
    else:
        hidden_dim = variant['lstm_kwargs']['hidden_dim']
        num_layers = variant['lstm_kwargs']['num_layers']
        a_0 = np.zeros(action_dim)
        h_0 = np.zeros(hidden_dim * num_layers)
        c_0 = np.zeros(hidden_dim * num_layers)
        latent_0 = (h_0, c_0)
        from lstm_net import LSTMNet
        lstm_net = LSTMNet(obs_dim, action_dim, hidden_dim, num_layers)
        decoder = nn.Linear(hidden_dim, action_dim)
        from layers import ReshapeLayer
        sup_learner = nn.Sequential(
            nn.Linear(hidden_dim, int(label_num * label_dim)),
            ReshapeLayer(shape=(label_num, label_dim)),
        )
        from sup_softmax_lstm_policy import SupSoftmaxLSTMPolicy
        policy = SupSoftmaxLSTMPolicy(
            a_0=a_0,
            latent_0=latent_0,
            obs_dim=obs_dim,
            action_dim=action_dim,
            lstm_net=lstm_net,
            decoder=decoder,
            sup_learner=sup_learner,
        )
        print('parameters: ',
              np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

        vf = Mlp(
            hidden_sizes=[32, 32],
            input_size=obs_dim,
            output_size=1,
        )

    vf_criterion = nn.MSELoss()
    from rlkit.torch.policies.make_deterministic import MakeDeterministic
    eval_policy = MakeDeterministic(policy)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )

    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        action_dim=action_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
        max_path_length=max_path_length,
        recurrent=True,
    )

    from rlkit.torch.vpg.ppo_sup import PPOSupTrainer
    trainer = PPOSupTrainer(policy=policy,
                            value_function=vf,
                            vf_criterion=vf_criterion,
                            replay_buffer=replay_buffer,
                            recurrent=True,
                            **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #7
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    if variant['load_kwargs']['load']:
        load_dir = variant['load_kwargs']['load_dir']
        load_data = torch.load(load_dir + '/params.pkl', map_location='cpu')
        policy = load_data['trainer/policy']
        vf = load_data['trainer/value_function']
    else:
        hidden_dim = variant['mlp_kwargs']['hidden']
        encoder = nn.Sequential(
            nn.Linear(obs_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
        )
        decoder = nn.Linear(hidden_dim, action_dim)
        from layers import ReshapeLayer
        sup_learner = nn.Sequential(
            nn.Linear(hidden_dim,
                      int(expl_env.label_num * expl_env.label_dim)),
            ReshapeLayer(shape=(expl_env.label_num, expl_env.label_dim)),
        )
        from sup_softmax_policy import SupSoftmaxPolicy
        policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)
        print('parameters: ',
              np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

        vf = Mlp(
            hidden_sizes=[32, 32],
            input_size=obs_dim,
            output_size=1,
        )

    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )

    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.ppo_sup_vanilla import PPOSupVanillaTrainer
    trainer = PPOSupVanillaTrainer(policy=policy,
                                   value_function=vf,
                                   vf_criterion=vf_criterion,
                                   replay_buffer=replay_buffer,
                                   **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #8
0
def experiment(variant):
    from simple_sup import SimpleSupEnv
    expl_env = SimpleSupEnv(**variant['env_kwars'])
    eval_env = SimpleSupEnv(**variant['env_kwars'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    hidden_dim = variant['hidden_dim']
    policy = nn.Sequential(
        nn.Linear(obs_dim + int(label_dim * label_num), hidden_dim), nn.ReLU(),
        nn.Linear(hidden_dim, hidden_dim), nn.ReLU(),
        nn.Linear(hidden_dim, action_dim))
    from layers import ReshapeLayer
    sup_learner = nn.Sequential(
        nn.Linear(obs_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, int(label_num * label_dim)),
        ReshapeLayer(shape=(label_num, label_dim)),
    )
    from sup_sep_softmax_policy import SupSepSoftmaxPolicy
    policy = SupSepSoftmaxPolicy(policy, sup_learner, label_num, label_dim)
    print('parameters: ',
          np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

    vf = Mlp(
        hidden_sizes=[32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    from sup_sep_rollout import sup_sep_rollout
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
        rollout_fn=sup_sep_rollout,
    )
    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.ppo_sup_sep import PPOSupSepTrainer
    trainer = PPOSupSepTrainer(policy=policy,
                               value_function=vf,
                               vf_criterion=vf_criterion,
                               replay_buffer=replay_buffer,
                               **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #9
0
convNet = Network()
# Build ConvNet with ConvLayer and PoolingLayer
with open('params.npy', 'rb') as f:
    conv1 = ConvLayer(in_channels=3, out_channels=8, kernel_size=11)
    conv1.W = np.load(f)
    conv1.b = np.load(f)
    convNet.add(conv1)
    convNet.add(ReLULayer())
    convNet.add(MaxPoolingLayer(kernel_size=10))
    conv2 = ConvLayer(in_channels=8, out_channels=16, kernel_size=6)
    conv2.W = np.load(f)
    conv2.b = np.load(f)
    convNet.add(conv2)
    convNet.add(ReLULayer())
    convNet.add(MaxPoolingLayer(kernel_size=3))
    convNet.add(ReshapeLayer((batch_size, 16, 6, 6), (batch_size, 576)))
    fc1 = FCLayer(576, 64)
    fc1.W = np.load(f)
    fc1.b = np.load(f)
    convNet.add(fc1)
    convNet.add(ReLULayer())
    fc2 = FCLayer(64, 2)
    fc2.W = np.load(f)
    fc2.b = np.load(f)
    convNet.add(fc2)

img = Image.open('./ImageRecognition/trainingset_image/d_f18.jpg')

width, height = (img.size[0], img.size[0]) if img.size[0] < img.size[1] else (
    img.size[1], img.size[1])  # Get dimensions