def __init__(self, batch_size=64, env_batch=1, max_step=40, \ tau=0.001, discount=0.9, rmsize=800, \ writer=None, resume=None, output_path=None): self.max_step = max_step self.env_batch = env_batch self.batch_size = batch_size self.actor = ResNet(9, 18, 65) self.actor_target = ResNet(9, 18, 65) self.critic = ResNet_wobn(3 + 9, 18, 1) self.critic_target = ResNet_wobn(3 + 9, 18, 1) self.actor_optim = Adam(self.actor.parameters(), lr=1e-2) self.critic_optim = Adam(self.critic.parameters(), lr=1e-2) if (resume != None): self.load_weights(resume) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) self.memory = rpm(rmsize * max_step) self.tau = tau self.discount = discount self.writer = writer self.log = 0 self.state = [None] * self.env_batch self.action = [None] * self.env_batch self.choose_device()
def __init__( self, batch_size=64, env_batch=1, max_step=40, tau=0.001, discount=0.9, rmsize=800, writer=None, resume=None, output_path=None, ): self.max_step = max_step self.env_batch = env_batch self.batch_size = batch_size self.actor = ResNet( 9, 18, (action_dim + 3) * n_frames_per_step ) # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2 self.actor_target = ResNet(9, 18, (action_dim + 3) * n_frames_per_step) self.critic = ResNet_wobn( 3 + 9, 18, 1 ) # add the last canvas for better prediction self.critic_target = ResNet_wobn(3 + 9, 18, 1) self.actor_optim = Adam(self.actor.parameters(), lr=1e-2) self.critic_optim = Adam(self.critic.parameters(), lr=1e-2) if resume != None: self.load_weights(resume) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) # Create replay buffer self.memory = rpm(rmsize * max_step) # Hyper-parameters self.tau = tau self.discount = discount # Tensorboard self.writer = writer self.log = 0 self.state = [None] * self.env_batch # Most recent state self.action = [None] * self.env_batch # Most recent action self.choose_device()
def __init__(self, batch_size=64, env_batch=1, max_step=40, \ tau=0.001, discount=0.9, rmsize=800, \ writer=None, resume=None, output_path=None): self.max_step = max_step self.env_batch = env_batch self.batch_size = batch_size self.alpha = 0.001 self.entropy_tuning = True if self.entropy_tuning: self.entropy_target = -torch.Tensor([65.]).to(device) self.log_alpha = torch.tensor([0.], requires_grad=True, device=device) self.alpha_optimizer = Adam([self.log_alpha], lr=1e-3) self.pi = ResGaussianActor( 9, 18, 65) # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2 self.q1 = ResNet_wobn(3 + 9, 18, 1) # add the last canvas for better prediction self.q1_target = ResNet_wobn(3 + 9, 18, 1) self.q2 = ResNet_wobn(3 + 9, 18, 1) self.q2_target = ResNet_wobn(3 + 9, 18, 1) self.pi_optim = Adam(self.pi.parameters(), lr=1e-2) self.q1_optim = Adam(self.q1.parameters(), lr=1e-2) self.q2_optim = Adam(self.q2.parameters(), lr=1e-2) if (resume != None): self.load_weights(resume) hard_update(self.q1_target, self.q1) hard_update(self.q2_target, self.q2) # Create replay buffer self.memory = rpm(rmsize * max_step) # Hyper-parameters self.tau = tau self.discount = discount # Tensorboard self.writer = writer self.log = 0 self.state = [None] * self.env_batch # Most recent state self.action = [None] * self.env_batch # Most recent action self.choose_device()
def __init__(self, opt, writer=None): self.opt = opt self.max_step = opt.max_step self.env_batch = opt.env_batch self.batch_size = opt.batch_size self.loss_fcn = opt.loss_fcn self.use_multiple_renderers = opt.use_multiple_renderers state_size = 9 if (opt.loss_fcn == 'cm' or opt.loss_fcn == 'cml1') and self.opt.built_in_cm: state_size = 10 self.actor = ResNet( state_size, 18, 13 * n_strokes) # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2 self.actor_target = ResNet(state_size, 18, 13 * n_strokes) self.critic = ResNet_wobn( 3 + state_size, 18, 1) # add the last canvas for better prediction self.critic_target = ResNet_wobn(3 + state_size, 18, 1) if not opt.use_multiple_renderers: Decoder.load_state_dict(torch.load(opt.renderer)) self.actor_optim = Adam(self.actor.parameters(), lr=1e-2) self.critic_optim = Adam(self.critic.parameters(), lr=1e-2) if (opt.resume != None): self.load_weights(opt.resume) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) # Create replay buffer self.memory = rpm(opt.rmsize * opt.max_step) # Hyper-parameters self.tau = opt.tau self.discount = opt.discount # Tensorboard self.writer = writer self.log = 0 self.state = [None] * self.env_batch # Most recent state self.action = [None] * self.env_batch # Most recent action self.choose_device()
def __init__(self, state_dim, merged_state_dim, act_dim, batch_size=64, env_batch=1, max_step=40, \ tau=0.001, discount=0.9, rmsize=800, \ writer=None, resume=None, output_path=None): self.state_dim = state_dim self.merged_state_dim = merged_state_dim self.act_dim = act_dim self.max_step = max_step self.env_batch = env_batch self.batch_size = batch_size # input channel (state_dim): canvas, gt, parameters, stepnum 3 + 3 + 2 + 1 self.actor = ResNet(self.state_dim, 18, self.act_dim) self.actor_target = ResNet(self.state_dim, 18, self.act_dim) # input channel (merged_state_dim): canvas, parameters, stepnum, coordconv 3 + 2 + 1 + 2 self.critic = ResNet_wobn(self.merged_state_dim, 18, 1, self.act_dim) self.critic_target = ResNet_wobn(self.merged_state_dim, 18, 1, self.act_dim) self.actor_optim = Adam(self.actor.parameters(), lr=1e-2) self.critic_optim = Adam(self.critic.parameters(), lr=1e-2) if (resume != None): self.load_weights(resume) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) # Create replay buffer self.memory = rpm(rmsize * max_step) # Hyper-parameters self.tau = tau self.discount = discount # Tensorboard self.writer = writer self.log = 0 self.state = [None] * self.env_batch # Most recent state self.action = [None] * self.env_batch # Most recent action self.choose_device()
def __init__(self, batch_size=64, env_batch=1, max_step=40, \ tau=0.001, discount=0.9, rmsize=800, \ writer=None, resume=None, output_path=None): self.max_step = max_step self.env_batch = env_batch self.batch_size = batch_size self.actor = ResNet( 9, 18, 65) # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2 self.actor_target = ResNet(9, 18, 65) self.critic = ResNet_wobn(9, 18, 1) self.critic_target = ResNet_wobn(9, 18, 1) self.actor_optim = Adam(self.actor.parameters(), lr=1e-2) self.critic_optim = Adam(self.critic.parameters(), lr=1e-2) if (resume != None): self.load_weights(resume) hard_update(self.actor_target, self.actor) hard_update(self.critic_target, self.critic) # Create replay buffer self.memory = rpm(rmsize * max_step) # Hyper-parameters self.tau = tau self.discount = discount # Tensorboard self.writer = writer self.log = 0 self.state = [None] * self.env_batch # Most recent state self.action = [None] * self.env_batch # Most recent action self.cuda = torch.cuda.is_available() self.choose_device()