def __init__(self, num_inputs, action_space): super(MLPPolicy, self).__init__() self.hook = False self.action_space = action_space self.nNode = 128 self.actor = nn.Sequential( nn.Linear(num_inputs, self.nNode), nn.Tanh(), nn.Linear(self.nNode, self.nNode), nn.Tanh() ) self.critic = nn.Sequential( nn.Linear(num_inputs, self.nNode), nn.Tanh(), nn.Linear(self.nNode, self.nNode), nn.Tanh() ) self.critic_linear = nn.Linear(self.nNode, 1) self.dist = get_distribution(self.nNode, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(EmbBase, self).__init__() emb_dim = 500 self.action_space = action_space emb0 = nn.Embedding(num_inputs, emb_dim) self.actor = nn.Sequential( emb0, # nn.Sigmoid(), # nn.Linear(emb_dim, emb_dim), # nn.Tanh() ) emb1 = nn.Embedding(num_inputs, emb_dim) self.critic = nn.Sequential( emb1, # nn.Sigmoid(), # nn.Linear(emb_dim, emb_dim), # nn.Tanh() ) self.critic_linear = nn.Linear(emb_dim, 1) self.dist = get_distribution(emb_dim, action_space) self.train() self.reset_parameters() emb0.weight.data = torch.eye(emb_dim) emb1.weight.data = torch.eye(emb_dim)
def __init__(self, num_inputs, action_space, use_gru): super(CNNBase, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.main = nn.Sequential( init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(), init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(), init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(), init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU()) if use_gru: self.gru = nn.GRUCell(512, 512) nn.init.orthogonal_(self.gru.weight_ih.data) nn.init.orthogonal_(self.gru.weight_hh.data) self.gru.bias_ih.data.fill_(0) self.gru.bias_hh.data.fill_(0) init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.critic_linear = init_(nn.Linear(512, 1)) self.dist = get_distribution(512, action_space) self.train()
def __init__(self, input_size, hidden_size, action_space, num_layers=1): super(DirectRLModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.input_size = input_size self.action_space = action_space self.seq_no_dropout = nn.Sequential( nn.Linear(input_size, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 20), nn.ReLU(), ) self.apply(self.weights_init) # Initialisation de la partie rnn du réseau self.rnn = nn.GRUCell(20, hidden_size) # Initialisation de la critique v(s) dans A2c self.critic_linear = nn.Linear(hidden_size, 1) # # Initialisation de l'acteur qui décidera des actions entre Short (0), Neutral (1) ou Buy (2) dans A2c self.dist = get_distribution(hidden_size, self.action_space)
def train(encoder, decoder, discriminator, opt): encode_optimizer = Adam(encoder.parameters(), lr=0.001) decode_optimizer = Adam(decoder.parameters(), lr=0.001) discriminate_optimizer = Adam(discriminator.parameters(), lr=0.001) image_dataset = get_dataset(opt.data_name, opt.data_root, opt.image_size, train=True) image_loader = DataLoader(image_dataset, batch_size=opt.batch_size, shuffle=True) dist_dataset = get_distribution(opt.distribution, len(image_dataset), opt.num_classes) dist_loader = DataLoader(dist_dataset, batch_size=opt.batch_size, shuffle=True, worker_init_fn=lambda x: np.random.seed()) encoder.train() decoder.train() discriminator.train() for i, ((image, image_label), (z_real, z_label)) in enumerate(zip(image_loader, dist_loader)): image = image.to(opt.device) image_label = image_label.to(opt.device) z_real = z_real.to(opt.device) z_label = z_label.to(opt.device) # reconstruct encode_optimizer.zero_grad() decode_optimizer.zero_grad() output = decoder(encoder(image)) reconstruct_loss = -torch.mean(image * torch.log(output + 1e-8) + (1 - image) * torch.log(1 - output + 1e-8)) reconstruct_loss.backward() encode_optimizer.step() decode_optimizer.step() # discriminator encode_optimizer.zero_grad() discriminate_optimizer.zero_grad() with torch.no_grad(): z_fake = encoder(image) d_real = discriminator(z_real, z_label) d_fake = discriminator(z_fake, image_label) d_loss = -0.02 * torch.mean(torch.log(d_real + 1e-8) + torch.log(1 - d_fake + 1e-8)) d_loss.backward() discriminate_optimizer.step() # encoder encode_optimizer.zero_grad() discriminate_optimizer.zero_grad() z_fake = encoder(image) e_fake = discriminator(z_fake, image_label) e_loss = -0.02 * torch.mean(torch.log(e_fake + 1e-8)) e_loss.backward() encode_optimizer.step() return reconstruct_loss, e_loss, d_loss
def __init__(self, num_actor_inputs, num_critic_inputs, action_space, symm_policy=True, use_seq=False, cuda_use=False): super(MLPPolicy, self).__init__() self.action_space = action_space self.nNode = 512 # 64, 128 self.hidden_dim = 512 # 64, 128 self.cuda_use = cuda_use self.symm_policy = symm_policy if use_seq == True: self.seq = 0 # as input, (N, Cin, L), N: batch size, Cin: input size, L: length of signal seq self.actor = nn.Sequential( nn.Linear(num_actor_inputs, self.nNode), # nn.Tanh(), nn.ReLU(), nn.Linear(self.nNode, self.hidden_dim), # nn.Tanh(), nn.ReLU(), nn.Linear(self.nNode, self.hidden_dim), # nn.Tanh(), nn.ReLU(), ) self.critic = nn.Sequential( nn.Linear(num_critic_inputs, self.nNode), # nn.Tanh(), nn.ReLU(), nn.Linear(self.nNode, self.hidden_dim), # nn.Tanh(), nn.ReLU(), nn.Linear(self.nNode, self.hidden_dim), # nn.Tanh(), nn.ReLU(), ) self.critic_linear = nn.Linear(self.hidden_dim, 1) # self.hidden_dim self.dist = get_distribution(self.hidden_dim, action_space) # self.hidden_dim self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru, use_icm): super(CNNPolicy, self).__init__() self.head = NatureHead(num_inputs) if use_gru: self.gru = nn.GRUCell(512, 512) if use_icm: self.icm = ICM(action_space, 512, num_inputs) self.critic_linear = nn.Linear(512, 1) self.dist = get_distribution(512, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(MLPPolicy_linear, self).__init__() self.action_space = action_space self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) self.v_fc1 = nn.Linear(num_inputs, 64) self.v_fc2 = nn.Linear(64, 64) self.critic_linear = nn.Linear(64, 1) self.dist = get_distribution(64, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4) self.conv2 = nn.Conv2d(32, 64, 4, stride=2) self.conv3 = nn.Conv2d(64, 32, 3, stride=1) self.linear1 = nn.Linear(32 * 7 * 7, 512) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(512, 1) self.dist = get_distribution(512, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(CONVPolicy, self).__init__() # image size: (180, 180) self.main = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 5, stride=2), nn.ReLU(), nn.Conv2d(64, 32, 4, stride=2), nn.ReLU(), nn.Conv2d(32, 16, 3, stride=1), nn.ReLU(), Flatten(), nn.Linear(16 * 7 * 7, 512), nn.ReLU()) self.critic_linear = nn.Linear(512, 1) self.dist = get_distribution(512, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space, use_gru): super(CNNPolicy, self).__init__() self.main = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(), nn.Conv2d(64, 32, 3, stride=1), nn.ReLU(), Flatten(), nn.Linear(32 * 7 * 7, 512), nn.ReLU()) if use_gru: self.gru = nn.GRUCell(512, 512) self.critic_linear = nn.Linear(512, 1) self.dist = get_distribution(512, action_space) self.train() self.reset_parameters()
def __init__(self, num_inputs, action_space): super(MLPBase, self).__init__() self.action_space = action_space init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_( x, 0)) self.actor = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh()) self.critic = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(), init_(nn.Linear(64, 64)), nn.Tanh()) self.critic_linear = init_(nn.Linear(64, 1)) self.dist = get_distribution(64, action_space) self.train()
def __init__(self, num_inputs, action_space, use_icm): super(MLPPolicy, self).__init__() self.action_space = action_space if use_icm: self.icm = ICM(action_space, num_inputs, cnn_head=False) self.a_fc1 = nn.Linear(num_inputs, 64) self.a_fc2 = nn.Linear(64, 64) self.v_fc1 = nn.Linear(num_inputs, 64) self.v_fc2 = nn.Linear(64, 64) self.critic_linear = nn.Linear(64, 1) self.dist = get_distribution(64, action_space) self.train() self.reset_parameters()
def __init__(self, num_actor_inputs, num_critic_inputs, action_space, use_gru=False, cuda_use=False): super(RNNPolicy, self).__init__() self.action_space = action_space self.nNode = 64 self.hidden_dim = 64 self.cuda_use = cuda_use if use_gru == True: self.gru = 0 self.actor = nn.Sequential(nn.Linear(num_actor_inputs, self.nNode), nn.Tanh() # nn.ReLU() ) self.actor_lstm = nn.LSTM(self.nNode, self.hidden_dim, num_layers=1) self.a_lstm_hidden = self.init_hidden() self.critic = nn.Sequential(nn.Linear(num_critic_inputs, self.nNode), nn.Tanh() # nn.ReLU() ) self.critic_lstm = nn.LSTM(self.nNode, self.hidden_dim, num_layers=1) self.c_lstm_hidden = self.init_hidden() self.critic_linear = nn.Linear(self.hidden_dim, 1) self.dist = get_distribution(self.hidden_dim, action_space) self.train() self.reset_parameters()