Exemplo n.º 1
0
    def build_model(self):
        """Create a generator and a discriminator."""
        if self.dataset in ['CelebA', 'RaFD', 'MNIST']:
            self.G = Generator(self.g_conv_dim, self.c_dim + self.con_dim,
                               self.image_size, self.op_channels)
            self.FE = FE(self.image_size, self.d_conv_dim, self.op_channels)
            self.D = Discriminator(self.image_size, self.d_conv_dim,
                                   self.c_dim)
            self.Q = Q(self.image_size, self.d_conv_dim, self.con_dim)

        elif self.dataset in ['Both']:
            self.G = Generator(self.g_conv_dim, self.c_dim + self.c2_dim + 2,
                               self.g_repeat_num)  # 2 for mask vector.
            self.D = Discriminator(self.image_size, self.d_conv_dim,
                                   self.c_dim + self.c2_dim, self.d_repeat_num)

        self.g_optimizer = torch.optim.Adam([{
            'params': self.G.parameters()
        }, {
            'params': self.Q.parameters()
        }], self.g_lr, [self.beta1, self.beta2])
        self.d_optimizer = torch.optim.Adam([{
            'params': self.D.parameters()
        }, {
            'params': self.FE.parameters()
        }], self.d_lr, [self.beta1, self.beta2])
        # self.print_network(self.G, 'G')
        # self.print_network(self.D, 'D')
        self.G.to(self.device)
        self.D.to(self.device)
        self.FE.to(self.device)
        self.Q.to(self.device)
    def __init__(self):
        self.V=V(n_state).to(device)
        self.target_V=V(n_state).to(device)
        self.policy=Actor(n_state,max_action).to(device)
        self.Q=Q(n_state,n_action).to(device)
        
        self.optimV=th.optim.Adam(self.V.parameters(),lr=lr)
        self.optimQ=th.optim.Adam(self.Q.parameters(),lr=lr)
        self.optimP=th.optim.Adam(self.policy.parameters(),lr=lr)

        self.memory=replay_memory(memory_size)
def epsilon_greedy(s, Q=Q, epsilon=epsilon):
    # torch expects FloatTensors, so we use `.float()`
    s = torch.from_numpy(s).float()
    s = Variable(s, volatile=True).cuda()

    if random.random() <= epsilon:
        a = env.action_space.sample()
    else:
        a = int(Q(s).max(0)[1])

    return a
Exemplo n.º 4
0
Arquivo: agent2.py Projeto: arasdar/RL
    def __init__(self, s_size, a_size, random_seed):
        """Initialize an Agent object.
        
        Params
        ======
            s_size (int): dimension of each state (s)
            a_size (int): dimension of each action (a)
            random_seed (int): random seed
        """
        self.s_size = s_size
        self.a_size = a_size
        self.random_seed = random.seed(random_seed)

        # Q-Network
        self.q = Q(s_size, a_size, random_seed).to(device)
        self.q_target = Q(s_size, a_size, random_seed).to(device)
        self.optimizer = optim.Adam(self.q.parameters(), lr=LR)

        # Replay memory
        self.memory = Memory(a_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Exemplo n.º 5
0
    def __init__(self, side, checkpoint_name, initialization_checkpoint):
        self.side = side
        self.checkpoint_name = checkpoint_name

        self.sess = tf.Session()
        with tf.variable_scope("model_" + checkpoint_name,
                               reuse=tf.AUTO_REUSE) as scope:
            self.state_placeholder = tf.placeholder(tf.int32, shape=(9, 3))
            self.next_state_placeholder = tf.placeholder(tf.int32,
                                                         shape=(9, 3))
            self.reward_placeholder = tf.placeholder(tf.int32)
            self.soft_tensor, self.update_op, self.loss_op, self.reward_op = Q(
                self.state_placeholder, self.next_state_placeholder,
                self.reward_placeholder)
        self.saver = tf.train.Saver()
        if not initialization_checkpoint:
            self.sess.run(tf.global_variables_initializer())
        else:
            self.saver.restore(self.sess, "ckpt/" + initialization_checkpoint)
Exemplo n.º 6
0
Arquivo: hrl.py Projeto: zacrash/HRL
 def __init__(self, state_size, action_size, max_tau=5):
     self.max_tau = max_tau
     self.iterations = 3
     self.state_size = state_size
     self.action_size = action_size
     self.goal_size = state_size
     self.lr = 0.001
     self.batch_size = 128
     self.episodes = 1000
     self.epsilon = 1.0
     self.epsilon_min = 0.01
     self.epsilon_decay = 0.999
     self.model = Q(self.state_size + self.goal_size + self.tau_size,
                    action_size, self.lr)
     self.optimizer = optim.Adam(self.model.parameters(), lr=self.model.lr)
     self.loss = nn.SmoothL1Loss()
     #self.loss = nn.MSELoss()
     self.memory = deque(maxlen=2000)
     self.k = 4
     self.n = 2
Exemplo n.º 7
0
    def __init__(self,
                 gamma,
                 alpha,
                 epsilon,
                 epsilon_min,
                 epsilon_decay,
                 game="CartPole-v1",
                 mean_bound=5,
                 reward_bound=495.0,
                 save_model=10):
        # Environment variables
        self.game = game
        self.env = gym.make(self.game)
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n

        # Agent variables
        self.alpha = alpha
        self.gamma = gamma
        self.model = Q(self.num_actions, self.alpha, self.gamma)
        self.save_model = save_model
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.mean_bound = mean_bound
        self.reward_bound = reward_bound

        # File paths
        dirname = os.path.dirname(__file__)
        self.path_model = os.path.join(dirname, "../models/q.pickle")
        self.path_plot = os.path.join(dirname, "../plots/q.png")

        # Load model, if it already exists
        try:
            self.model.load(self.path_model)
        except:
            print("Model does not exist! Create new model...")
Exemplo n.º 8
0
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch import nn
from torch.utils.data import DataLoader

from args import args
from env import env
from exploration import decay_exploration, epsilon, epsilon_greedy
from model import Q
from replay_buffer import replay_buffer
from train import criterion, train

optimizer = optim.Adam(Q.parameters(), lr=args.lr)

# TODO wrap data in dataset

for i in range(args.iterations):

    done = False
    s = env.reset()  # TODO fold into rollout

    while not done:

        epsilon = decay_exploration(i, epsilon)

        a = epsilon_greedy(s, epsilon=epsilon)

        succ, r, done, _ = env.step(a)
Exemplo n.º 9
0
def train(iterations=10000,
          batch_size=100,
          sample_interval=5,
          save_model_interval=100,
          train_D_iters=1,
          train_G_iters=3,
          D_lr=0.0001,
          G_lr=0.0001,
          betas=(0.5, 0.99),
          img_dir='./info_imgs',
          model_dir='./models'):

    imgs, digits, test_img, test_digits = load_mnist()
    dataset = Dataset(imgs, digits)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    #dataset = Dataset(test_img, test_digits)
    #test_loader = DataLoader(dataset,batch_size=batch_size, shuffle=False)

    if torch.cuda.is_available:
        print(f"Using GPU {torch.cuda.current_device()}")
        device = "cuda"
    else:
        print("Using CPU...")
        device = "cpu"

    generaotor, discriminator, front, qq, encoder = G(), D(), FrontEnd(), Q(
    ), E()
    generaotor = generaotor.to(device).apply(weights_init)
    discriminator = discriminator.to(device).apply(weights_init)
    qq = qq.to(device).apply(weights_init)
    encoder = encoder.to(device).apply(weights_init)
    front = front.to(device).apply(weights_init)

    opt_G = torch.optim.Adam([{
        "params": generaotor.parameters()
    }, {
        "params": qq.parameters()
    }, {
        "params": encoder.parameters()
    }],
                             lr=G_lr,
                             betas=betas)

    opt_D = torch.optim.Adam([{
        "params": discriminator.parameters()
    }, {
        "params": front.parameters()
    }],
                             lr=D_lr,
                             betas=betas)

    CELoss_D = nn.CrossEntropyLoss(
        weight=torch.FloatTensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])).to(device)
    CELoss_G = nn.CrossEntropyLoss(weight=torch.FloatTensor(
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 20])).to(device)
    CELoss_Q = nn.CrossEntropyLoss().to(device)
    CosineLoss = nn.CosineEmbeddingLoss().to(device)

    real_x = torch.FloatTensor(batch_size, 1, 32, 32).to(device)
    trg = torch.LongTensor(batch_size).to(device)
    label = torch.FloatTensor(batch_size, 1).to(device)
    noise = torch.FloatTensor(batch_size, 54).to(device)
    c = torch.FloatTensor(batch_size, 10).to(device)
    v_target = torch.LongTensor(batch_size, 64).to(device)  # For Q

    real_x = Variable(real_x)
    noise = Variable(noise)
    c = Variable(c)
    trg = Variable(trg, requires_grad=False)
    label = Variable(label, requires_grad=False)
    v_target = Variable(v_target, requires_grad=False)

    for epoch in range(iterations):
        for step, [batch_x, batch_target] in enumerate(loader):

            bs = batch_x.size(0)

            # train D
            #==========
            # real
            opt_D.zero_grad()

            real_x.data.copy_(batch_x)
            trg.data.copy_(batch_target)

            fe1 = front(real_x)
            real_pred = discriminator(fe1)

            real_loss = CELoss_D(real_pred, trg)
            real_loss.backward()

            #fake
            real_x.data.copy_(batch_x)

            v = encoder(real_x)
            z, idx = noise_sample(c, noise, v, bs)

            fake_stroke = generaotor(z)
            fake_x = fake_stroke + real_x
            fake_x = fake_x.clamp(max=1, min=0)

            fe2 = front(fake_x.detach())
            fake_pred = discriminator(fe2)

            trg.data.fill_(10)
            if epoch > 0:
                ignore_rate = 0.01
            else:
                ignore_rate = 1

            fake_loss = CELoss_D(fake_pred, trg) * ignore_rate
            fake_loss.backward()
            D_loss = real_loss + fake_loss
            #D_loss.backward()
            opt_D.step()

            # train G, Q, E
            #===============
            #train G
            opt_G.zero_grad()
            fe = front(fake_x)
            fake_pred = discriminator(fe)

            trg.data.copy_(torch.LongTensor(idx))
            reconstruct_loss = CELoss_G(fake_pred, trg)

            # train Q
            c_out, v_out = qq(fe)

            class_ = torch.LongTensor(idx).to(device)
            target = Variable(class_)

            v_target.data.copy_(v)

            # GQ Loss
            q_c_loss = CELoss_Q(c_out, target)
            q_v_loss = CosineLoss(v_out, v_target, label.data.fill_(1))

            q_loss = q_c_loss + q_v_loss

            G_loss = reconstruct_loss + q_c_loss + q_v_loss
            G_loss.backward()
            opt_G.step()

            # accuracy

        print(
            f'Epoch: {epoch} | Dloss: {D_loss.data.cpu().numpy()} | QCloss: {q_c_loss.data.cpu().numpy()} | QVloss: {q_v_loss.data.cpu().numpy()} | reloss: {reconstruct_loss.data.cpu().numpy()}'
        )
        save_image(torch.cat((fake_x, fake_stroke, real_x), dim=0).data,
                   f'./{img_dir}/{epoch}.png',
                   nrow=20)
        print(f"fake pred {np.argmax(fake_pred.data.cpu().numpy(),axis=1)}")
        #print(f"Qpred {np.argmax(c_out[1].data.cpu().numpy())}")
        #print(f"Origin {batch_target[1].data.cpu().numpy()} ToBe: {idx[0]}")
        #save_image(real_x.data, f'./{img_dir}/{epoch}_R.png', nrow=10)
        """
Exemplo n.º 10
0
        dis_c.data.copy_(torch.Tensor(one_hot))
        print(np.shape(c1))
        con_c = Variable(torch.rand(con_c.size())).cuda()
        z = torch.cat([noise, dis_c, con_c], 1).view(-1, 74, 1, 1)
        x_save = self.G(z)
        save_image(x_save.data, os.path.join(args.path, 'generate.png'), nrow=10)

def parse():
    parser = argparse.ArgumentParser(description='VAE MNIST Example')
    parser.add_argument('--label', type=int, default=1, metavar='N',
                        help='The label you want to generate')
    parser.add_argument('--num', type=int, default=1, metavar='N',
                        help='The number of image you want to generate')
    args = parser.parse_args()
    args.cuda = torch.cuda.is_available()
    args.path = './infoGAN_result'
    return args

if __name__ == '__main__':
    args = parse()
    fe = FrontEnd()
    d = D()
    q = Q()
    g = G()

    for i in [fe, d, q, g]:
        i.cuda()

    tester = Tester(g, fe, d, q, args)
    tester.load()
    tester.generate()
    idx_2 = np.random.randint(2, size=bs)
    c_2 = np.zeros((bs, 2))
    c_2[range(bs), idx_2] = 1.0
    # print('c_2: ', c_2)

    dis_c.data.copy_(torch.Tensor(c))
    con_c.data.copy_(torch.Tensor(c_2))
    noise.data.uniform_(-1.0, 1.0)
    print('noise: ', noise.shape)
    z = torch.cat([noise, dis_c, con_c], 1).view(-1, 74, 1, 1)

    return z, idx, idx_2


model_Q = Q().to(device)
model_FE = FrontEnd().to(device)
model_G = G().to(device)
model_D = D().to(device)

model_Q.load_state_dict(
    torch.load(model_path + '/model_Q.pytorch', map_location='cpu'))
model_D.load_state_dict(
    torch.load(model_path + '/model_D.pytorch', map_location='cpu'))
model_FE.load_state_dict(
    torch.load(model_path + '/model_FE.pytorch', map_location='cpu'))
model_G.load_state_dict(
    torch.load(model_path + '/model_G.pytorch', map_location='cpu'))

model_Q.eval()
model_D.eval()
Exemplo n.º 12
0
    transforms.Scale(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

train_dataset = dataset.MNIST(root='./data/',
                              train=True,
                              transform=transform,
                              download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

gx = P()
gz = Q()
dxz = D()

g_param = chain(gx.parameters(), gz.parameters())
d_param = dxz.parameters()

g_optimizer = optim.Adam(g_param, glr, betas=(0.5, 0.999))
d_optimizer = optim.Adam(d_param, dlr, betas=(0.5, 0.999))


def to_variable(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)

Exemplo n.º 13
0
env = gym.make('Pendulum-v0')
env.seed(1)
paddle.seed(1)
np.random.seed(1)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])
min_val = paddle.to_tensor(1e-7).astype('float32')

actor = Actor(state_dim, action_dim, max_action)
actor_optimizer = paddle.optimizer.RMSProp(parameters=actor.parameters(),
                                  learning_rate=learning_rate)

Q_net = Q(state_dim, action_dim)
Q_optimizer = paddle.optimizer.RMSProp(parameters=Q_net.parameters(),
                                  learning_rate=learning_rate)

critic = Critic(state_dim)
target_critic = Critic(state_dim)
target_critic.eval()
target_critic.load_dict(critic.state_dict())
critic_optimizer = paddle.optimizer.RMSProp(parameters=critic.parameters(),
                                  learning_rate=learning_rate)

rpm = ReplayMemory(memory_size)

def train():
    global epoch
    total_reward = 0
Exemplo n.º 14
0
random.seed(1388420)

algo_name = 'DQN-TAMER'
env = gym.make('LunarLander-v2')
max_ep = 1000
epsilon = .3
gamma = .99
human = Human(1388420)
alpha_q = 1
alpha_h = alpha_q

#Proportion of network you want to keep
tau = .995

q = Q(env)
q_target = deepcopy(q)

h = H(env)
h_target = deepcopy(h)

q_optim = torch.optim.Adam(q.parameters(), lr=1e-3)
h_optim = torch.optim.Adam(h.parameters(), lr=1e-3)

batch_size = 128
rb = ReplayBuffer(1e6)

h_batch = 16
human_rb = HumanReplayBuffer(1e6)
local_batch = History(1e3)