def experiment(variant): from simple_sup import SimpleSupEnv expl_env = SimpleSupEnv(**variant['env_kwars']) eval_env = SimpleSupEnv(**variant['env_kwars']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n encoder = nn.Sequential( nn.Linear(obs_dim, 16), nn.ReLU(), ) decoder = nn.Linear(16, action_dim) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(16, action_dim), ReshapeLayer(shape=(1, action_dim)), ) from sup_softmax_policy import SupSoftmaxPolicy policy = SupSoftmaxPolicy(encoder, decoder, sup_learner) vf = Mlp( hidden_sizes=[32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) from sup_replay_buffer import SupReplayBuffer replay_buffer = SupReplayBuffer( observation_dim=obs_dim, label_dim=1, max_replay_buffer_size=int(1e6), ) from rlkit.torch.vpg.trpo_sup import TRPOSupTrainer trainer = TRPOSupTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, replay_buffer=replay_buffer, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from simple_sup import SimpleSupEnv expl_env = SimpleSupEnv(**variant['env_kwars']) eval_env = SimpleSupEnv(**variant['env_kwars']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n hidden_dim = variant['hidden_dim'] encoder = nn.Sequential( nn.Linear(obs_dim,hidden_dim), nn.ReLU(), nn.Linear(hidden_dim,hidden_dim), nn.ReLU(), ) decoder = nn.Linear(hidden_dim, action_dim) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(hidden_dim, action_dim), ReshapeLayer(shape=(1, action_dim)), ) from sup_softmax_policy import SupSoftmaxPolicy policy = SupSoftmaxPolicy(encoder, decoder, sup_learner) print('parameters: ',np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy,use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) from rlkit.torch.vpg.ppo_sup_online import PPOSupOnlineTrainer trainer = PPOSupOnlineTrainer( policy=policy, value_function=vf, vf_criterion=vf_criterion, **variant['trainer_kwargs'] ) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, **variant['algorithm_kwargs'] ) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from traffic.make_env import make_env expl_env = make_env(args.exp_name, **variant['env_kwargs']) eval_env = make_env(args.exp_name, **variant['env_kwargs']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim encoder = nn.Sequential( nn.Linear(obs_dim, 32), nn.ReLU(), nn.Linear(32, 32), nn.ReLU(), ) decoder = nn.Linear(32, action_dim) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(32, int(label_num * label_dim)), ReshapeLayer(shape=(label_num, label_dim)), ) from sup_softmax_policy import SupSoftmaxPolicy policy = SupSoftmaxPolicy(encoder, decoder, sup_learner) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) trainer = TRPOTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, log_path_function=get_traffic_path_information, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def build_generator(input_var=None, use_batch_norm=True): from lasagne.layers import InputLayer, batch_norm from layers import (Lipshitz_Layer, LipConvLayer, Subpixel_Layer, ReshapeLayer, FlattenLayer) layer = InputLayer(shape=(None, 10), input_var=input_var) if use_batch_norm: raise NotImplementedError else: layer = Lipshitz_Layer(layer, 512 * 7 * 7, init=1) layer = ReshapeLayer(layer, (-1, 512, 7, 7)) layer = Subpixel_Layer(layer, 256, (3, 3), 2) layer = Subpixel_Layer(layer, 128, (3, 3), 2) layer = Subpixel_Layer(layer, 64, (3, 3), 2) layer = LipConvLayer(layer, 1, (1, 1), init=1, nonlinearity=lasagne.nonlinearities.sigmoid) layer = ReshapeLayer(layer, (-1, 784)) print("Generator output:", layer.output_shape) print("Number of parameters:", lasagne.layers.count_params(layer)) return layer
def build_discriminator(input_var=None, use_batch_norm=True): from lasagne.layers import InputLayer, batch_norm from layers import (Lipshitz_Layer, LipConvLayer, Subpixel_Layer, ReshapeLayer, FlattenLayer) layer = InputLayer(shape=(None, 784), input_var=input_var) if use_batch_norm: raise NotImplementedError else: layer = ReshapeLayer(layer, (-1, 1, 28, 28)) layer = LipConvLayer(layer, 16, (5, 5), init=1) layer = LipConvLayer(layer, 32, (5, 5), init=1) layer = LipConvLayer(layer, 64, (5, 5), init=1) layer = LipConvLayer(layer, 128, (5, 5), init=1) layer = FlattenLayer(layer) layer = Lipshitz_Layer(layer, 512, init=1) layer = Lipshitz_Layer(layer, 1 + 10, init=1, nonlinearity=lasagne.nonlinearities.sigmoid) print("Discriminator output:", layer.output_shape) print("Number of parameters:", lasagne.layers.count_params(layer)) return layer
def experiment(variant): from traffic.make_env import make_env expl_env = make_env(args.exp_name, **variant['env_kwargs']) eval_env = make_env(args.exp_name, **variant['env_kwargs']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim max_path_length = variant['trainer_kwargs']['max_path_length'] if variant['load_kwargs']['load']: load_dir = variant['load_kwargs']['load_dir'] load_data = torch.load(load_dir + '/params.pkl', map_location='cpu') policy = load_data['trainer/policy'] vf = load_data['trainer/value_function'] else: hidden_dim = variant['lstm_kwargs']['hidden_dim'] num_layers = variant['lstm_kwargs']['num_layers'] a_0 = np.zeros(action_dim) h_0 = np.zeros(hidden_dim * num_layers) c_0 = np.zeros(hidden_dim * num_layers) latent_0 = (h_0, c_0) from lstm_net import LSTMNet lstm_net = LSTMNet(obs_dim, action_dim, hidden_dim, num_layers) decoder = nn.Linear(hidden_dim, action_dim) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(hidden_dim, int(label_num * label_dim)), ReshapeLayer(shape=(label_num, label_dim)), ) from sup_softmax_lstm_policy import SupSoftmaxLSTMPolicy policy = SupSoftmaxLSTMPolicy( a_0=a_0, latent_0=latent_0, obs_dim=obs_dim, action_dim=action_dim, lstm_net=lstm_net, decoder=decoder, sup_learner=sup_learner, ) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() from rlkit.torch.policies.make_deterministic import MakeDeterministic eval_policy = MakeDeterministic(policy) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) from sup_replay_buffer import SupReplayBuffer replay_buffer = SupReplayBuffer( observation_dim=obs_dim, action_dim=action_dim, label_dim=label_num, max_replay_buffer_size=int(1e6), max_path_length=max_path_length, recurrent=True, ) from rlkit.torch.vpg.ppo_sup import PPOSupTrainer trainer = PPOSupTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, replay_buffer=replay_buffer, recurrent=True, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, log_path_function=get_traffic_path_information, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from traffic.make_env import make_env expl_env = make_env(args.exp_name, **variant['env_kwargs']) eval_env = make_env(args.exp_name, **variant['env_kwargs']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim if variant['load_kwargs']['load']: load_dir = variant['load_kwargs']['load_dir'] load_data = torch.load(load_dir + '/params.pkl', map_location='cpu') policy = load_data['trainer/policy'] vf = load_data['trainer/value_function'] else: hidden_dim = variant['mlp_kwargs']['hidden'] encoder = nn.Sequential( nn.Linear(obs_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), ) decoder = nn.Linear(hidden_dim, action_dim) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(hidden_dim, int(expl_env.label_num * expl_env.label_dim)), ReshapeLayer(shape=(expl_env.label_num, expl_env.label_dim)), ) from sup_softmax_policy import SupSoftmaxPolicy policy = SupSoftmaxPolicy(encoder, decoder, sup_learner) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32, 32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) from sup_replay_buffer import SupReplayBuffer replay_buffer = SupReplayBuffer( observation_dim=obs_dim, label_dim=label_num, max_replay_buffer_size=int(1e6), ) from rlkit.torch.vpg.ppo_sup_vanilla import PPOSupVanillaTrainer trainer = PPOSupVanillaTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, replay_buffer=replay_buffer, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, log_path_function=get_traffic_path_information, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
def experiment(variant): from simple_sup import SimpleSupEnv expl_env = SimpleSupEnv(**variant['env_kwars']) eval_env = SimpleSupEnv(**variant['env_kwars']) obs_dim = eval_env.observation_space.low.size action_dim = eval_env.action_space.n label_num = expl_env.label_num label_dim = expl_env.label_dim hidden_dim = variant['hidden_dim'] policy = nn.Sequential( nn.Linear(obs_dim + int(label_dim * label_num), hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, action_dim)) from layers import ReshapeLayer sup_learner = nn.Sequential( nn.Linear(obs_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, int(label_num * label_dim)), ReshapeLayer(shape=(label_num, label_dim)), ) from sup_sep_softmax_policy import SupSepSoftmaxPolicy policy = SupSepSoftmaxPolicy(policy, sup_learner, label_num, label_dim) print('parameters: ', np.sum([p.view(-1).shape[0] for p in policy.parameters()])) vf = Mlp( hidden_sizes=[32], input_size=obs_dim, output_size=1, ) vf_criterion = nn.MSELoss() eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True) expl_policy = policy eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) from sup_sep_rollout import sup_sep_rollout expl_path_collector = MdpPathCollector( expl_env, expl_policy, rollout_fn=sup_sep_rollout, ) from sup_replay_buffer import SupReplayBuffer replay_buffer = SupReplayBuffer( observation_dim=obs_dim, label_dim=label_num, max_replay_buffer_size=int(1e6), ) from rlkit.torch.vpg.ppo_sup_sep import PPOSupSepTrainer trainer = PPOSupSepTrainer(policy=policy, value_function=vf, vf_criterion=vf_criterion, replay_buffer=replay_buffer, **variant['trainer_kwargs']) algorithm = TorchOnlineRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()
convNet = Network() # Build ConvNet with ConvLayer and PoolingLayer with open('params.npy', 'rb') as f: conv1 = ConvLayer(in_channels=3, out_channels=8, kernel_size=11) conv1.W = np.load(f) conv1.b = np.load(f) convNet.add(conv1) convNet.add(ReLULayer()) convNet.add(MaxPoolingLayer(kernel_size=10)) conv2 = ConvLayer(in_channels=8, out_channels=16, kernel_size=6) conv2.W = np.load(f) conv2.b = np.load(f) convNet.add(conv2) convNet.add(ReLULayer()) convNet.add(MaxPoolingLayer(kernel_size=3)) convNet.add(ReshapeLayer((batch_size, 16, 6, 6), (batch_size, 576))) fc1 = FCLayer(576, 64) fc1.W = np.load(f) fc1.b = np.load(f) convNet.add(fc1) convNet.add(ReLULayer()) fc2 = FCLayer(64, 2) fc2.W = np.load(f) fc2.b = np.load(f) convNet.add(fc2) img = Image.open('./ImageRecognition/trainingset_image/d_f18.jpg') width, height = (img.size[0], img.size[0]) if img.size[0] < img.size[1] else ( img.size[1], img.size[1]) # Get dimensions