def build_model(self): """Create a generator and a discriminator.""" if self.dataset in ['CelebA', 'RaFD', 'MNIST']: self.G = Generator(self.g_conv_dim, self.c_dim + self.con_dim, self.image_size, self.op_channels) self.FE = FE(self.image_size, self.d_conv_dim, self.op_channels) self.D = Discriminator(self.image_size, self.d_conv_dim, self.c_dim) self.Q = Q(self.image_size, self.d_conv_dim, self.con_dim) elif self.dataset in ['Both']: self.G = Generator(self.g_conv_dim, self.c_dim + self.c2_dim + 2, self.g_repeat_num) # 2 for mask vector. self.D = Discriminator(self.image_size, self.d_conv_dim, self.c_dim + self.c2_dim, self.d_repeat_num) self.g_optimizer = torch.optim.Adam([{ 'params': self.G.parameters() }, { 'params': self.Q.parameters() }], self.g_lr, [self.beta1, self.beta2]) self.d_optimizer = torch.optim.Adam([{ 'params': self.D.parameters() }, { 'params': self.FE.parameters() }], self.d_lr, [self.beta1, self.beta2]) # self.print_network(self.G, 'G') # self.print_network(self.D, 'D') self.G.to(self.device) self.D.to(self.device) self.FE.to(self.device) self.Q.to(self.device)
def __init__(self): self.V=V(n_state).to(device) self.target_V=V(n_state).to(device) self.policy=Actor(n_state,max_action).to(device) self.Q=Q(n_state,n_action).to(device) self.optimV=th.optim.Adam(self.V.parameters(),lr=lr) self.optimQ=th.optim.Adam(self.Q.parameters(),lr=lr) self.optimP=th.optim.Adam(self.policy.parameters(),lr=lr) self.memory=replay_memory(memory_size)
def epsilon_greedy(s, Q=Q, epsilon=epsilon): # torch expects FloatTensors, so we use `.float()` s = torch.from_numpy(s).float() s = Variable(s, volatile=True).cuda() if random.random() <= epsilon: a = env.action_space.sample() else: a = int(Q(s).max(0)[1]) return a
def __init__(self, s_size, a_size, random_seed): """Initialize an Agent object. Params ====== s_size (int): dimension of each state (s) a_size (int): dimension of each action (a) random_seed (int): random seed """ self.s_size = s_size self.a_size = a_size self.random_seed = random.seed(random_seed) # Q-Network self.q = Q(s_size, a_size, random_seed).to(device) self.q_target = Q(s_size, a_size, random_seed).to(device) self.optimizer = optim.Adam(self.q.parameters(), lr=LR) # Replay memory self.memory = Memory(a_size, BUFFER_SIZE, BATCH_SIZE, random_seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, side, checkpoint_name, initialization_checkpoint): self.side = side self.checkpoint_name = checkpoint_name self.sess = tf.Session() with tf.variable_scope("model_" + checkpoint_name, reuse=tf.AUTO_REUSE) as scope: self.state_placeholder = tf.placeholder(tf.int32, shape=(9, 3)) self.next_state_placeholder = tf.placeholder(tf.int32, shape=(9, 3)) self.reward_placeholder = tf.placeholder(tf.int32) self.soft_tensor, self.update_op, self.loss_op, self.reward_op = Q( self.state_placeholder, self.next_state_placeholder, self.reward_placeholder) self.saver = tf.train.Saver() if not initialization_checkpoint: self.sess.run(tf.global_variables_initializer()) else: self.saver.restore(self.sess, "ckpt/" + initialization_checkpoint)
def __init__(self, state_size, action_size, max_tau=5): self.max_tau = max_tau self.iterations = 3 self.state_size = state_size self.action_size = action_size self.goal_size = state_size self.lr = 0.001 self.batch_size = 128 self.episodes = 1000 self.epsilon = 1.0 self.epsilon_min = 0.01 self.epsilon_decay = 0.999 self.model = Q(self.state_size + self.goal_size + self.tau_size, action_size, self.lr) self.optimizer = optim.Adam(self.model.parameters(), lr=self.model.lr) self.loss = nn.SmoothL1Loss() #self.loss = nn.MSELoss() self.memory = deque(maxlen=2000) self.k = 4 self.n = 2
def __init__(self, gamma, alpha, epsilon, epsilon_min, epsilon_decay, game="CartPole-v1", mean_bound=5, reward_bound=495.0, save_model=10): # Environment variables self.game = game self.env = gym.make(self.game) self.num_states = self.env.observation_space.shape[0] self.num_actions = self.env.action_space.n # Agent variables self.alpha = alpha self.gamma = gamma self.model = Q(self.num_actions, self.alpha, self.gamma) self.save_model = save_model self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.mean_bound = mean_bound self.reward_bound = reward_bound # File paths dirname = os.path.dirname(__file__) self.path_model = os.path.join(dirname, "../models/q.pickle") self.path_plot = os.path.join(dirname, "../plots/q.png") # Load model, if it already exists try: self.model.load(self.path_model) except: print("Model does not exist! Create new model...")
import torch.nn.functional as F import torch.nn.init as init import torch.optim as optim import torchvision.datasets as dset import torchvision.transforms as transforms from torch import nn from torch.utils.data import DataLoader from args import args from env import env from exploration import decay_exploration, epsilon, epsilon_greedy from model import Q from replay_buffer import replay_buffer from train import criterion, train optimizer = optim.Adam(Q.parameters(), lr=args.lr) # TODO wrap data in dataset for i in range(args.iterations): done = False s = env.reset() # TODO fold into rollout while not done: epsilon = decay_exploration(i, epsilon) a = epsilon_greedy(s, epsilon=epsilon) succ, r, done, _ = env.step(a)
def train(iterations=10000, batch_size=100, sample_interval=5, save_model_interval=100, train_D_iters=1, train_G_iters=3, D_lr=0.0001, G_lr=0.0001, betas=(0.5, 0.99), img_dir='./info_imgs', model_dir='./models'): imgs, digits, test_img, test_digits = load_mnist() dataset = Dataset(imgs, digits) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) #dataset = Dataset(test_img, test_digits) #test_loader = DataLoader(dataset,batch_size=batch_size, shuffle=False) if torch.cuda.is_available: print(f"Using GPU {torch.cuda.current_device()}") device = "cuda" else: print("Using CPU...") device = "cpu" generaotor, discriminator, front, qq, encoder = G(), D(), FrontEnd(), Q( ), E() generaotor = generaotor.to(device).apply(weights_init) discriminator = discriminator.to(device).apply(weights_init) qq = qq.to(device).apply(weights_init) encoder = encoder.to(device).apply(weights_init) front = front.to(device).apply(weights_init) opt_G = torch.optim.Adam([{ "params": generaotor.parameters() }, { "params": qq.parameters() }, { "params": encoder.parameters() }], lr=G_lr, betas=betas) opt_D = torch.optim.Adam([{ "params": discriminator.parameters() }, { "params": front.parameters() }], lr=D_lr, betas=betas) CELoss_D = nn.CrossEntropyLoss( weight=torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])).to(device) CELoss_G = nn.CrossEntropyLoss(weight=torch.FloatTensor( [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])).to(device) CELoss_Q = nn.CrossEntropyLoss().to(device) CosineLoss = nn.CosineEmbeddingLoss().to(device) real_x = torch.FloatTensor(batch_size, 1, 32, 32).to(device) trg = torch.LongTensor(batch_size).to(device) label = torch.FloatTensor(batch_size, 1).to(device) noise = torch.FloatTensor(batch_size, 54).to(device) c = torch.FloatTensor(batch_size, 10).to(device) v_target = torch.LongTensor(batch_size, 64).to(device) # For Q real_x = Variable(real_x) noise = Variable(noise) c = Variable(c) trg = Variable(trg, requires_grad=False) label = Variable(label, requires_grad=False) v_target = Variable(v_target, requires_grad=False) for epoch in range(iterations): for step, [batch_x, batch_target] in enumerate(loader): bs = batch_x.size(0) # train D #========== # real opt_D.zero_grad() real_x.data.copy_(batch_x) trg.data.copy_(batch_target) fe1 = front(real_x) real_pred = discriminator(fe1) real_loss = CELoss_D(real_pred, trg) real_loss.backward() #fake real_x.data.copy_(batch_x) v = encoder(real_x) z, idx = noise_sample(c, noise, v, bs) fake_stroke = generaotor(z) fake_x = fake_stroke + real_x fake_x = fake_x.clamp(max=1, min=0) fe2 = front(fake_x.detach()) fake_pred = discriminator(fe2) trg.data.fill_(10) if epoch > 0: ignore_rate = 0.01 else: ignore_rate = 1 fake_loss = CELoss_D(fake_pred, trg) * ignore_rate fake_loss.backward() D_loss = real_loss + fake_loss #D_loss.backward() opt_D.step() # train G, Q, E #=============== #train G opt_G.zero_grad() fe = front(fake_x) fake_pred = discriminator(fe) trg.data.copy_(torch.LongTensor(idx)) reconstruct_loss = CELoss_G(fake_pred, trg) # train Q c_out, v_out = qq(fe) class_ = torch.LongTensor(idx).to(device) target = Variable(class_) v_target.data.copy_(v) # GQ Loss q_c_loss = CELoss_Q(c_out, target) q_v_loss = CosineLoss(v_out, v_target, label.data.fill_(1)) q_loss = q_c_loss + q_v_loss G_loss = reconstruct_loss + q_c_loss + q_v_loss G_loss.backward() opt_G.step() # accuracy print( f'Epoch: {epoch} | Dloss: {D_loss.data.cpu().numpy()} | QCloss: {q_c_loss.data.cpu().numpy()} | QVloss: {q_v_loss.data.cpu().numpy()} | reloss: {reconstruct_loss.data.cpu().numpy()}' ) save_image(torch.cat((fake_x, fake_stroke, real_x), dim=0).data, f'./{img_dir}/{epoch}.png', nrow=20) print(f"fake pred {np.argmax(fake_pred.data.cpu().numpy(),axis=1)}") #print(f"Qpred {np.argmax(c_out[1].data.cpu().numpy())}") #print(f"Origin {batch_target[1].data.cpu().numpy()} ToBe: {idx[0]}") #save_image(real_x.data, f'./{img_dir}/{epoch}_R.png', nrow=10) """
dis_c.data.copy_(torch.Tensor(one_hot)) print(np.shape(c1)) con_c = Variable(torch.rand(con_c.size())).cuda() z = torch.cat([noise, dis_c, con_c], 1).view(-1, 74, 1, 1) x_save = self.G(z) save_image(x_save.data, os.path.join(args.path, 'generate.png'), nrow=10) def parse(): parser = argparse.ArgumentParser(description='VAE MNIST Example') parser.add_argument('--label', type=int, default=1, metavar='N', help='The label you want to generate') parser.add_argument('--num', type=int, default=1, metavar='N', help='The number of image you want to generate') args = parser.parse_args() args.cuda = torch.cuda.is_available() args.path = './infoGAN_result' return args if __name__ == '__main__': args = parse() fe = FrontEnd() d = D() q = Q() g = G() for i in [fe, d, q, g]: i.cuda() tester = Tester(g, fe, d, q, args) tester.load() tester.generate()
idx_2 = np.random.randint(2, size=bs) c_2 = np.zeros((bs, 2)) c_2[range(bs), idx_2] = 1.0 # print('c_2: ', c_2) dis_c.data.copy_(torch.Tensor(c)) con_c.data.copy_(torch.Tensor(c_2)) noise.data.uniform_(-1.0, 1.0) print('noise: ', noise.shape) z = torch.cat([noise, dis_c, con_c], 1).view(-1, 74, 1, 1) return z, idx, idx_2 model_Q = Q().to(device) model_FE = FrontEnd().to(device) model_G = G().to(device) model_D = D().to(device) model_Q.load_state_dict( torch.load(model_path + '/model_Q.pytorch', map_location='cpu')) model_D.load_state_dict( torch.load(model_path + '/model_D.pytorch', map_location='cpu')) model_FE.load_state_dict( torch.load(model_path + '/model_FE.pytorch', map_location='cpu')) model_G.load_state_dict( torch.load(model_path + '/model_G.pytorch', map_location='cpu')) model_Q.eval() model_D.eval()
transforms.Scale(img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) train_dataset = dataset.MNIST(root='./data/', train=True, transform=transform, download=True) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) gx = P() gz = Q() dxz = D() g_param = chain(gx.parameters(), gz.parameters()) d_param = dxz.parameters() g_optimizer = optim.Adam(g_param, glr, betas=(0.5, 0.999)) d_optimizer = optim.Adam(d_param, dlr, betas=(0.5, 0.999)) def to_variable(x): if torch.cuda.is_available(): x = x.cuda() return Variable(x)
env = gym.make('Pendulum-v0') env.seed(1) paddle.seed(1) np.random.seed(1) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) min_val = paddle.to_tensor(1e-7).astype('float32') actor = Actor(state_dim, action_dim, max_action) actor_optimizer = paddle.optimizer.RMSProp(parameters=actor.parameters(), learning_rate=learning_rate) Q_net = Q(state_dim, action_dim) Q_optimizer = paddle.optimizer.RMSProp(parameters=Q_net.parameters(), learning_rate=learning_rate) critic = Critic(state_dim) target_critic = Critic(state_dim) target_critic.eval() target_critic.load_dict(critic.state_dict()) critic_optimizer = paddle.optimizer.RMSProp(parameters=critic.parameters(), learning_rate=learning_rate) rpm = ReplayMemory(memory_size) def train(): global epoch total_reward = 0
random.seed(1388420) algo_name = 'DQN-TAMER' env = gym.make('LunarLander-v2') max_ep = 1000 epsilon = .3 gamma = .99 human = Human(1388420) alpha_q = 1 alpha_h = alpha_q #Proportion of network you want to keep tau = .995 q = Q(env) q_target = deepcopy(q) h = H(env) h_target = deepcopy(h) q_optim = torch.optim.Adam(q.parameters(), lr=1e-3) h_optim = torch.optim.Adam(h.parameters(), lr=1e-3) batch_size = 128 rb = ReplayBuffer(1e6) h_batch = 16 human_rb = HumanReplayBuffer(1e6) local_batch = History(1e3)