def generate(self, batch_size, noises=None): if noises == None: noises = FloatTensor( np.random.normal(size=[batch_size, self.latent_depth]) ) else: noises = FloatTensor(noises) self.generator.eval() faked_samples = self.generator(noises) return faked_samples
def collate_fn(input_batch): order_id = [d['order_id'] for d in input_batch] product_history = [d['product_history'] for d in input_batch] product_history_lengths = [len(h) for h in product_history] next_product = [d['next_product'] for d in input_batch] target = [d['target'] for d in input_batch] # Sort the examples following product_history_lengths in reverse order. sort_index = np.argsort(product_history_lengths)[::-1] reorder = lambda l: [l[i] for i in sort_index] order_id = reorder(order_id) product_history = reorder(product_history) product_history_lengths = reorder(product_history_lengths) next_product = reorder(next_product) target = reorder(target) # Pad the product history sequences. pad = lambda h, length: np.pad(h, pad_width=(0, length - len(h)), mode='constant', constant_values=(0, 0)) product_history = np.array([pad(h, product_history_lengths[0]) for h in product_history]) product_history = to_var(torch.from_numpy(product_history)) output_batch = {} output_batch['order_id'] = order_id output_batch['product_history'] = product_history output_batch['product_history_lengths'] = product_history_lengths output_batch['next_product'] = to_var(LongTensor(next_product)) output_batch['target'] = to_var(FloatTensor(target)) return output_batch
def compute_pairwise_loss_in_euclidean(f1, f2, threshold): batch_size1 = f1.size(0) batch_size2 = f2.size(0) pairwise_diff = f1.unsqueeze(1).expand( -1, batch_size2, -1) - f2.unsqueeze(0).expand(batch_size1, -1, -1) pairwise_l2 = torch.mean(pairwise_diff**2, dim=2) pairwise_l2_mask = torch.where( pairwise_l2 <= threshold, Variable(FloatTensor(batch_size1, batch_size2).fill_(1.0), requires_grad=False), Variable(FloatTensor(batch_size1, batch_size2).fill_(0.0), requires_grad=False)) pairwise_l2_exp = torch.exp(-pairwise_l2 / float(threshold)) return torch.sum(pairwise_l2 * pairwise_l2_mask * pairwise_l2_exp) / torch.sum(pairwise_l2_mask)
def act(self, state): self.pi.eval() state = FloatTensor(state) distb = self.pi(state) action = distb.sample().detach().cpu().numpy() return action
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres, device): n_b = pred_boxes.size(0) n_a = pred_boxes.size(1) n_c = pred_cls.size(-1) n_g = pred_boxes.size(2) # Output tensors obj_mask = BoolTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) noobj_mask = BoolTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(1) class_mask = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) iou_scores = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) tx = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) ty = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) tw = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) th = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0) tcls = FloatTensor(n_b, n_a, n_g, n_g, n_c).cuda(device).fill_(0) # Convert to position relative to box target_boxes = target[:, 2:6] * n_g gxy = target_boxes[:, :2] gwh = target_boxes[:, 2:] # Get anchors with best iou ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors]) _, best_n = ious.max(0) # Separate target values b, target_labels = target[:, :2].long().t() gx, gy = gxy.t() gw, gh = gwh.t() gi, gj = gxy.long().t() # Set masks obj_mask[b, best_n, gj, gi] = 1 noobj_mask[b, best_n, gj, gi] = 0 # Set noobj mask to zero where iou exceeds ignore threshold for i, anchor_ious in enumerate(ious.t()): noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0 # Coordinates tx[b, best_n, gj, gi] = gx - gx.floor() ty[b, best_n, gj, gi] = gy - gy.floor() # Width and height tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16) th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16) # One-hot encoding of label tcls[b, best_n, gj, gi, target_labels] = 1 # Compute label correctness and iou at best anchor class_mask[b, best_n, gj, gi] = (pred_cls[b, best_n, gj, gi].argmax(-1) == target_labels).float() iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi], target_boxes, x1y1x2y2=False) tconf = obj_mask.float() return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
def train_one_step(self, real_samples): batch_size = real_samples.shape[0] real_samples = FloatTensor(real_samples) noises = FloatTensor(np.random.normal(size=[batch_size, self.latent_depth])) self.generator.train() self.discriminator.train() faked_samples = self.generator(noises) faked_scores = self.discriminator(faked_samples) generator_loss = torch.nn.functional.binary_cross_entropy_with_logits( input=faked_scores, target=torch.ones_like(faked_scores) ) self.generator_opt.zero_grad() generator_loss.backward() self.generator_opt.step() real_scores = self.discriminator(real_samples) faked_samples = self.generator(noises) faked_scores = self.discriminator(faked_samples) discriminator_loss = \ torch.nn.functional.binary_cross_entropy_with_logits( input=real_scores, target=torch.ones_like(real_scores) ) + \ torch.nn.functional.binary_cross_entropy_with_logits( input=faked_scores, target=torch.zeros_like(faked_scores) ) self.discriminator_opt.zero_grad() discriminator_loss.backward() self.discriminator_opt.step() return generator_loss.item(), discriminator_loss.item()
def knn_indices_func_gpu( seed: cuda.FloatTensor, # (B,C,npoint) pts: cuda.FloatTensor, # (B,C,N) k: int) -> cuda.LongTensor: # (N,npoint,K) """knn indices func reimplemented Args: seed (cuda.FloatTensor) : clusting seed->(B,C,npoint) pts (cuda.FloatTensor) : pointcloud using clusting method->(B,C,N) l (int) : k neibor in knn Returns: cuda.LongTensor: knn idx(B,npoint,k) """ _, _, N = seed.shape _, _, M = pts.shape mseed = seed.unsqueeze(-1).expand(-1, -1, -1, M) mpts = pts.unsqueeze(-2).expand(-1, -1, N, -1) mdist = torch.sum((mpts - mseed)**2, dim=1) # print("mseed:", mseed.shape, "\nmpts:", # mpts.shape, "\nmdist:", mdist.shape) _, idx = torch.topk(mdist, k=k + 1, largest=False) return idx[:, :, 1:]
def forward(self, x): if self.config['instance_norm']: x = self.inst_norm(x) for i in range(3): x = self.linears[i](x) if self.config['is_bn']: x = self.bns[i](x) x = self.lr(x) x = self.mp2_2(x) x = self.drop(x) if self.training and self.config['std'] > 10**-4: x += Variable(FloatTensor(x.size()).normal_())*self.config['std'] for i in range(3, 6): x = self.linears[i](x) if self.config['is_bn']: x = self.bns[i](x) x = self.lr(x) x = self.mp2_2(x) x = self.drop(x) if self.training and self.config['std'] > 10**-4: x += Variable(FloatTensor(x.size()).normal_())*self.config['std'] for i in range(6, self.nb_layer): x = self.linears[i](x) if self.config['is_bn']: x = self.bns[i](x) x = self.lr(x) x = self.avg_6(x) x = x.view(x.size(0),-1) return x
def compute_grid_offsets(self, grid_size, cuda): self.grid_size = grid_size g = self.grid_size self.stride = self.img_dim / self.grid_size # Calculate offsets for each grid self.grid_x = (torch.arange(g).repeat(g, 1).view( [1, 1, g, g]).type(FloatTensor).cuda(self.device)) self.grid_y = (torch.arange(g).repeat(g, 1).t().view( [1, 1, g, g]).type(FloatTensor).cuda(self.device)) self.scaled_anchors = FloatTensor([ (a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors ]).cuda(self.device) self.anchor_w = self.scaled_anchors[:, 0:1].view( (1, self.num_anchors, 1, 1)) self.anchor_h = self.scaled_anchors[:, 1:2].view( (1, self.num_anchors, 1, 1))
def filter_batch(self, batch, percentile): rewards = list(map(lambda s: s.reward, batch)) reward_bound = np.percentile(rewards, percentile) reward_mean = float(np.mean(rewards)) train_observations = [] train_actions = [] for episode in batch: if episode.reward < reward_bound: continue train_observations.extend( map(lambda step: step.state, episode.steps)) train_actions.extend(map(lambda step: step.action, episode.steps)) return FloatTensor(train_observations), LongTensor( train_actions), reward_bound, reward_mean # TODO use .to(device)
def forward(self, x): self.drop(x) if self.training and self.config['std'] > 10**-4: x += Variable(FloatTensor(x.size()).normal_())*self.config['std'] for i in range(3): x = self.linears[i](x) if self.config['is_bn']: x = self.bns[i](x) x = self.lr(x) # do not call average pooling for image with length 28: if self.is_avg_pool: x = self.avg_6(x) x = x.view(x.size(0),-1) x = self.linears[3](x) if self.config['is_bn']: x = self.bns[3](x) return x
def interpolation(self, uvm, image, index): u, v = torch.index_select(uvm, dim=1, index=LongTensor([0+3*index, 1+3*index])).permute(0, 2, 3, 1).split(1, dim=3) row_num = FloatTensor() col_num = FloatTensor() im_size = image.shape[2:4] torch.arange(im_size[0], out=row_num) torch.arange(im_size[1], out=col_num) row_num = row_num.view(1, im_size[0], 1, 1) col_num = col_num.view(1, 1, im_size[1], 1) x_norm = 2*(u+col_num)/(im_size[1]-1)-1 y_norm = 2*(v+row_num)/(im_size[0]-1)-1 xy_norm = torch.clamp(torch.cat((x_norm, y_norm), dim=3), -1, 1) interp = nn.functional.grid_sample(image, xy_norm) w = torch.index_select(uvm, dim=1, index=LongTensor([3*index+2]))+0.5 return interp, w, u, v
def action_probabilities(self, state): state_tensor = FloatTensor([state]) # TODO use .to(device) action_probs = self.softmax( self.net(state_tensor)).cpu().data.numpy()[0] return action_probs
def random_z(self, batch_size): return Variable( FloatTensor(np.random.normal(0, 1, (batch_size, self.latent_dim))))
def get_target(_, batch_size): return Variable( FloatTensor(np.random.normal(0.05, 0.05, (batch_size, 1))))
use_gpu=True if use_gpu: from torch.cuda import FloatTensor, LongTensor, ByteTensor def to_gpu(x): return x.cuda() else: from torch import FloatTensor, LongTensor, ByteTensor def to_gpu(x): return x.cpu() x1 = FloatTensor() x2 = ByteTensor() # the below function is from the Pytorch forums # https://discuss.pytorch.org/t/access-gpu-memory-usage-in-pytorch/3192/3 import subprocess def get_gpu_memory_map(): """Get the current gpu usage. Returns ------- usage: dict Keys are device ids as integers. Values are memory usage as integers in MB. """ try: result = subprocess.check_output( [ 'nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader' ], encoding='utf-8') # Convert lines into a dictionary gpu_memory = [int(x) for x in result.strip().split('\n')]
def train(self): num_channels = self.config.NUM_CHANNELS use_cuda = self.config.USE_CUDA lr = self.config.LEARNING_RATE # Networks netG_A2B = Generator(num_channels) netG_B2A = Generator(num_channels) netD_A = Discriminator(num_channels) netD_B = Discriminator(num_channels) #netG_A2B = Generator_BN(num_channels) #netG_B2A = Generator_BN(num_channels) #netD_A = Discriminator_BN(num_channels) #netD_B = Discriminator_BN(num_channels) if use_cuda: netG_A2B.cuda() netG_B2A.cuda() netD_A.cuda() netD_B.cuda() netG_A2B.apply(weights_init_normal) netG_B2A.apply(weights_init_normal) netD_A.apply(weights_init_normal) netD_B.apply(weights_init_normal) criterion_GAN = torch.nn.BCELoss() criterion_cycle = torch.nn.L1Loss() criterion_identity = torch.nn.L1Loss() optimizer_G = torch.optim.Adam(itertools.chain(netG_A2B.parameters(), netG_B2A.parameters()), lr=lr, betas=(0.5, 0.999)) optimizer_D_A = torch.optim.Adam(netD_A.parameters(), lr=lr, betas=(0.5, 0.999)) optimizer_D_B = torch.optim.Adam(netD_B.parameters(), lr=lr, betas=(0.5, 0.999)) lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(optimizer_G, lr_lambda=LambdaLR(self.config.EPOCH, 0, self.config.EPOCH//2).step) lr_scheduler_D_A = torch.optim.lr_scheduler.LambdaLR(optimizer_D_A, lr_lambda=LambdaLR(self.config.EPOCH, 0, self.config.EPOCH//2).step) lr_scheduler_D_B = torch.optim.lr_scheduler.LambdaLR(optimizer_D_B, lr_lambda=LambdaLR(self.config.EPOCH, 0, self.config.EPOCH//2).step) # Inputs & targets memory allocation #Tensor = LongTensor if use_cuda else torch.Tensor batch_size = self.config.BATCH_SIZE height, width, channels = self.config.INPUT_SHAPE input_A = FloatTensor(batch_size, channels, height, width) input_B = FloatTensor(batch_size, channels, height, width) target_real = Variable(FloatTensor(batch_size).fill_(1.0), requires_grad=False) target_fake = Variable(FloatTensor(batch_size).fill_(0.0), requires_grad=False) fake_A_buffer = ReplayBuffer() fake_B_buffer = ReplayBuffer() transforms_ = [transforms.RandomCrop((height, width)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] dataloader = DataLoader(ImageDataset(self.config.DATA_DIR, self.config.DATASET_A, self.config.DATASET_B, transforms_=transforms_, unaligned=True), batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True) # Loss plot logger = Logger(self.config.EPOCH, len(dataloader)) now = datetime.datetime.now() datetime_sequence = "{0}{1:02d}{2:02d}_{3:02}{4:02d}".format(str(now.year)[-2:], now.month, now.day , now.hour, now.minute) output_name_1 = self.config.DATASET_A + "2" + self.config.DATASET_B output_name_2 = self.config.DATASET_B + "2" + self.config.DATASET_A experiment_dir = os.path.join(self.config.RESULT_DIR, datetime_sequence) sample_output_dir_1 = os.path.join(experiment_dir, "sample", output_name_1) sample_output_dir_2 = os.path.join(experiment_dir, "sample", output_name_2) weights_output_dir_1 = os.path.join(experiment_dir, "weights", output_name_1) weights_output_dir_2 = os.path.join(experiment_dir, "weights", output_name_2) weights_output_dir_resume = os.path.join(experiment_dir, "weights", "resume") os.makedirs(sample_output_dir_1, exist_ok=True) os.makedirs(sample_output_dir_2, exist_ok=True) os.makedirs(weights_output_dir_1, exist_ok=True) os.makedirs(weights_output_dir_2, exist_ok=True) os.makedirs(weights_output_dir_resume, exist_ok=True) counter = 0 for epoch in range(self.config.EPOCH): """ logger.loss_df.to_csv(os.path.join(experiment_dir, self.config.DATASET_A + "_" + self.config.DATASET_B + ".csv"), index=False) """ if epoch % 100 == 0: torch.save(netG_A2B.state_dict(), os.path.join(weights_output_dir_1, str(epoch).zfill(4) + 'netG_A2B.pth')) torch.save(netG_B2A.state_dict(), os.path.join(weights_output_dir_2, str(epoch).zfill(4) + 'netG_B2A.pth')) torch.save(netD_A.state_dict(), os.path.join(weights_output_dir_1, str(epoch).zfill(4) + 'netD_A.pth')) torch.save(netD_B.state_dict(), os.path.join(weights_output_dir_2, str(epoch).zfill(4) + 'netD_B.pth')) for i, batch in enumerate(dataloader): # Set model input real_A = Variable(input_A.copy_(batch['A'])) real_B = Variable(input_B.copy_(batch['B'])) ###### Generators A2B and B2A ###### optimizer_G.zero_grad() # GAN loss fake_B = netG_A2B(real_A) pred_fake_B = netD_B(fake_B) loss_GAN_A2B = criterion_GAN(pred_fake_B, target_real) fake_A = netG_B2A(real_B) pred_fake_A = netD_A(fake_A) loss_GAN_B2A = criterion_GAN(pred_fake_A, target_real) # Cycle loss recovered_A = netG_B2A(fake_B) loss_cycle_ABA = criterion_cycle(recovered_A, real_A) * 10.0 recovered_B = netG_A2B(fake_A) loss_cycle_BAB = criterion_cycle(recovered_B, real_B) * 10.0 # Total loss loss_G = loss_GAN_A2B + loss_GAN_B2A + loss_cycle_ABA + loss_cycle_BAB loss_G.backward() optimizer_G.step() ################################### ###### Discriminator A ###### optimizer_D_A.zero_grad() # Real loss pred_A = netD_A(real_A) loss_D_real = criterion_GAN(pred_A, target_real) # Fake loss fake_A_ = fake_A_buffer.push_and_pop(fake_A) pred_fake = netD_A(fake_A_.detach()) loss_D_fake = criterion_GAN(pred_fake, target_fake) # Total loss loss_D_A = (loss_D_real + loss_D_fake) * 0.5 loss_D_A.backward() optimizer_D_A.step() ################################### ###### Discriminator B ###### optimizer_D_B.zero_grad() # Real loss pred_B = netD_B(real_B) loss_D_real = criterion_GAN(pred_B, target_real) # Fake loss fake_B_ = fake_B_buffer.push_and_pop(fake_B) pred_fake = netD_B(fake_B_.detach()) loss_D_fake = criterion_GAN(pred_fake, target_fake) # Total loss loss_D_B = (loss_D_real + loss_D_fake) * 0.5 loss_D_B.backward() optimizer_D_B.step() # Progress report (http://localhost:8097) logger.log({'loss_G': loss_G, 'loss_G_GAN': (loss_GAN_A2B + loss_GAN_B2A), 'loss_G_cycle': (loss_cycle_ABA + loss_cycle_BAB), 'loss_D': (loss_D_A + loss_D_B)}, images={'real_A': real_A, 'real_B': real_B, 'fake_A': fake_A, 'fake_B': fake_B}) if counter % 500 == 0: real_A_sample = real_A.cpu().detach().numpy()[0] pred_A_sample = fake_A.cpu().detach().numpy()[0] real_B_sample = real_B.cpu().detach().numpy()[0] pred_B_sample = fake_B.cpu().detach().numpy()[0] combine_sample_1 = np.concatenate([real_A_sample, pred_B_sample], axis=2) combine_sample_2 = np.concatenate([real_B_sample, pred_A_sample], axis=2) file_1 = "{0}_{1}.jpg".format(epoch, counter) output_sample_image(os.path.join(sample_output_dir_1, file_1), combine_sample_1) file_2 = "{0}_{1}.jpg".format(epoch, counter) output_sample_image(os.path.join(sample_output_dir_2, file_2), combine_sample_2) counter += 1 # Update learning rates lr_scheduler_G.step() lr_scheduler_D_A.step() lr_scheduler_D_B.step() torch.save(netG_A2B.state_dict(), os.path.join(weights_output_dir_1, str(self.config.EPOCH).zfill(4) + 'netG_A2B.pth')) torch.save(netG_B2A.state_dict(), os.path.join(weights_output_dir_2, str(self.config.EPOCH).zfill(4) + 'netG_B2A.pth')) torch.save(netD_A.state_dict(), os.path.join(weights_output_dir_1, str(self.config.EPOCH).zfill(4) + 'netD_A.pth')) torch.save(netD_B.state_dict(), os.path.join(weights_output_dir_2, str(self.config.EPOCH).zfill(4) + 'netD_B.pth'))
def train(self, env, expert, render=False): num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] horizon = self.train_config["horizon"] lambda_ = self.train_config["lambda"] gae_gamma = self.train_config["gae_gamma"] gae_lambda = self.train_config["gae_lambda"] eps = self.train_config["epsilon"] max_kl = self.train_config["max_kl"] cg_damping = self.train_config["cg_damping"] normalize_advantage = self.train_config["normalize_advantage"] opt_d = torch.optim.Adam(self.d.parameters()) exp_rwd_iter = [] exp_obs = [] exp_acts = [] steps = 0 while steps < num_steps_per_iter: ep_obs = [] ep_rwds = [] t = 0 done = False ob = env.reset() while not done and steps < num_steps_per_iter: act = expert.act(ob) ep_obs.append(ob) exp_obs.append(ob) exp_acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) ep_rwds.append(rwd) t += 1 steps += 1 if horizon is not None: if t >= horizon: break if done: exp_rwd_iter.append(np.sum(ep_rwds)) ep_obs = FloatTensor(ep_obs) ep_rwds = FloatTensor(ep_rwds) exp_rwd_mean = np.mean(exp_rwd_iter) print("Expert Reward Mean: {}".format(exp_rwd_mean)) exp_obs = FloatTensor(exp_obs) exp_acts = FloatTensor(np.array(exp_acts)) rwd_iter_means = [] for i in range(num_iters): rwd_iter = [] obs = [] acts = [] rets = [] advs = [] gms = [] steps = 0 while steps < num_steps_per_iter: ep_obs = [] ep_acts = [] ep_rwds = [] ep_costs = [] ep_disc_costs = [] ep_gms = [] ep_lmbs = [] t = 0 done = False ob = env.reset() while not done and steps < num_steps_per_iter: act = self.act(ob) ep_obs.append(ob) obs.append(ob) ep_acts.append(act) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) ep_rwds.append(rwd) ep_gms.append(gae_gamma**t) ep_lmbs.append(gae_lambda**t) t += 1 steps += 1 if horizon is not None: if t >= horizon: break if done: rwd_iter.append(np.sum(ep_rwds)) ep_obs = FloatTensor(ep_obs) # ep_acts = FloatTensor(np.array(ep_acts)).to(torch.device("cuda")) ep_acts = FloatTensor(np.array(ep_acts)) ep_rwds = FloatTensor(ep_rwds) # ep_disc_rwds = FloatTensor(ep_disc_rwds) ep_gms = FloatTensor(ep_gms) ep_lmbs = FloatTensor(ep_lmbs) ep_costs = (-1) * torch.log(self.d(ep_obs, ep_acts))\ .squeeze().detach() ep_disc_costs = ep_gms * ep_costs ep_disc_rets = FloatTensor( [sum(ep_disc_costs[i:]) for i in range(t)]) ep_rets = ep_disc_rets / ep_gms rets.append(ep_rets) self.v.eval() curr_vals = self.v(ep_obs).detach() next_vals = torch.cat( (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach() ep_deltas = ep_costs.unsqueeze(-1)\ + gae_gamma * next_vals\ - curr_vals ep_advs = torch.FloatTensor([ ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) * ep_deltas[j:]).sum() for j in range(t) ]) advs.append(ep_advs) gms.append(ep_gms) rwd_iter_means.append(np.mean(rwd_iter)) print("Iterations: {}, Reward Mean: {}".format( i + 1, np.mean(rwd_iter))) obs = FloatTensor(obs) # acts = FloatTensor(np.array(acts)).to(torch.device("cuda")) acts = FloatTensor(np.array(acts)) rets = torch.cat(rets) advs = torch.cat(advs) gms = torch.cat(gms) if normalize_advantage: advs = (advs - advs.mean()) / advs.std() self.d.train() exp_scores = self.d.get_logits(exp_obs, exp_acts) nov_scores = self.d.get_logits(obs, acts) opt_d.zero_grad() loss = torch.nn.functional.binary_cross_entropy_with_logits( exp_scores, torch.zeros_like(exp_scores) ) \ + torch.nn.functional.binary_cross_entropy_with_logits( nov_scores, torch.ones_like(nov_scores) ) loss.backward() opt_d.step() self.v.train() old_params = get_flat_params(self.v).detach() old_v = self.v(obs).detach() def constraint(): return ((old_v - self.v(obs))**2).mean() grad_diff = get_flat_grads(constraint(), self.v) def Hv(v): hessian = get_flat_grads(torch.dot(grad_diff, v), self.v)\ .detach() return hessian g = get_flat_grads( ((-1) * (self.v(obs).squeeze() - rets)**2).mean(), self.v).detach() s = conjugate_gradient(Hv, g).detach() Hs = Hv(s).detach() alpha = torch.sqrt(2 * eps / torch.dot(s, Hs)) new_params = old_params + alpha * s set_params(self.v, new_params) self.pi.train() old_params = get_flat_params(self.pi).detach() old_distb = self.pi(obs) def L(): distb = self.pi(obs) return (advs.to(torch.device("cuda")) * torch.exp( distb.log_prob(acts) - old_distb.log_prob(acts).detach()) ).mean() def kld(): distb = self.pi(obs) if self.discrete: old_p = old_distb.probs.detach() p = distb.probs return (old_p * (torch.log(old_p) - torch.log(p)))\ .sum(-1)\ .mean() else: old_mean = old_distb.mean.detach() old_cov = old_distb.covariance_matrix.sum(-1).detach() mean = distb.mean cov = distb.covariance_matrix.sum(-1) return (0.5) * ((old_cov / cov).sum(-1) + (((old_mean - mean)**2) / cov).sum(-1) - self.action_dim + torch.log(cov).sum(-1) - torch.log(old_cov).sum(-1)).mean() grad_kld_old_param = get_flat_grads(kld(), self.pi) def Hv(v): hessian = get_flat_grads(torch.dot(grad_kld_old_param, v), self.pi).detach() return hessian + cg_damping * v g = get_flat_grads(L(), self.pi).detach() s = conjugate_gradient(Hv, g).detach() Hs = Hv(s).detach() new_params = rescale_and_linesearch(g, s, Hs, max_kl, L, kld, old_params, self.pi) disc_causal_entropy = ((-1) * gms * self.pi(obs).log_prob(acts))\ .mean() grad_disc_causal_entropy = get_flat_grads(disc_causal_entropy, self.pi) new_params += lambda_ * grad_disc_causal_entropy set_params(self.pi, new_params) return exp_rwd_mean, rwd_iter_means
def train(self, env, render=False): num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] horizon = self.train_config["horizon"] gamma_ = self.train_config["gamma"] lambda_ = self.train_config["lambda"] eps = self.train_config["epsilon"] max_kl = self.train_config["max_kl"] cg_damping = self.train_config["cg_damping"] normalize_advantage = self.train_config["normalize_advantage"] rwd_iter_means = [] for i in range(num_iters): rwd_iter = [] obs = [] acts = [] rets = [] advs = [] gms = [] steps = 0 while steps < num_steps_per_iter: ep_obs = [] ep_rwds = [] ep_disc_rwds = [] ep_gms = [] ep_lmbs = [] t = 0 done = False ob = env.reset() while not done and steps < num_steps_per_iter: act = self.act(ob) ep_obs.append(ob) obs.append(ob) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) ep_rwds.append(rwd) ep_disc_rwds.append(rwd * (gamma_**t)) ep_gms.append(gamma_**t) ep_lmbs.append(lambda_**t) t += 1 steps += 1 if horizon is not None: if t >= horizon: done = True break if done: rwd_iter.append(np.sum(ep_rwds)) ep_obs = FloatTensor(np.array(ep_obs)) ep_rwds = FloatTensor(ep_rwds) ep_disc_rwds = FloatTensor(ep_disc_rwds) ep_gms = FloatTensor(ep_gms) ep_lmbs = FloatTensor(ep_lmbs) ep_disc_rets = FloatTensor( [sum(ep_disc_rwds[i:]) for i in range(t)]) ep_rets = ep_disc_rets / ep_gms rets.append(ep_rets) self.v.eval() curr_vals = self.v(ep_obs).detach() next_vals = torch.cat( (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach() ep_deltas = ep_rwds.unsqueeze(-1)\ + gamma_ * next_vals\ - curr_vals ep_advs = FloatTensor([ ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) * ep_deltas[j:]).sum() for j in range(t) ]) advs.append(ep_advs) gms.append(ep_gms) rwd_iter_means.append(np.mean(rwd_iter)) print("Iterations: {}, Reward Mean: {}".format( i + 1, np.mean(rwd_iter))) obs = FloatTensor(np.array(obs)) acts = FloatTensor(np.array(acts)) rets = torch.cat(rets) advs = torch.cat(advs) gms = torch.cat(gms) if normalize_advantage: advs = (advs - advs.mean()) / advs.std() self.v.train() old_params = get_flat_params(self.v).detach() old_v = self.v(obs).detach() def constraint(): return ((old_v - self.v(obs))**2).mean() grad_diff = get_flat_grads(constraint(), self.v) def Hv(v): hessian = get_flat_grads(torch.dot(grad_diff, v), self.v)\ .detach() return hessian g = get_flat_grads( ((-1) * (self.v(obs).squeeze() - rets)**2).mean(), self.v).detach() s = conjugate_gradient(Hv, g).detach() Hs = Hv(s).detach() alpha = torch.sqrt(2 * eps / torch.dot(s, Hs)) new_params = old_params + alpha * s set_params(self.v, new_params) self.pi.train() old_params = get_flat_params(self.pi).detach() old_distb = self.pi(obs) def L(): distb = self.pi(obs) return (advs * torch.exp( distb.log_prob(acts) - old_distb.log_prob(acts).detach()) ).mean() def kld(): distb = self.pi(obs) if self.discrete: old_p = old_distb.probs.detach() p = distb.probs return (old_p * (torch.log(old_p) - torch.log(p)))\ .sum(-1)\ .mean() else: old_mean = old_distb.mean.detach() old_cov = old_distb.covariance_matrix.sum(-1).detach() mean = distb.mean cov = distb.covariance_matrix.sum(-1) return (0.5) * ((old_cov / cov).sum(-1) + (((old_mean - mean)**2) / cov).sum(-1) - self.action_dim + torch.log(cov).sum(-1) - torch.log(old_cov).sum(-1)).mean() grad_kld_old_param = get_flat_grads(kld(), self.pi) def Hv(v): hessian = get_flat_grads(torch.dot(grad_kld_old_param, v), self.pi).detach() return hessian + cg_damping * v g = get_flat_grads(L(), self.pi).detach() s = conjugate_gradient(Hv, g).detach() Hs = Hv(s).detach() new_params = rescale_and_linesearch(g, s, Hs, max_kl, L, kld, old_params, self.pi) set_params(self.pi, new_params) return rwd_iter_means
def train(self, env, render=False): lr = self.train_config["lr"] num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] num_epochs = self.train_config["num_epochs"] minibatch_size = self.train_config["minibatch_size"] horizon = self.train_config["horizon"] gamma_ = self.train_config["gamma"] lambda_ = self.train_config["lambda"] eps = self.train_config["epsilon"] c1 = self.train_config["vf_coeff"] c2 = self.train_config["entropy_coeff"] normalize_advantage = self.train_config["normalize_advantage"] opt_pi = torch.optim.Adam(self.pi.parameters(), lr) opt_v = torch.optim.Adam(self.v.parameters(), lr) rwd_iter_means = [] for i in range(num_iters): rwd_iter = [] obs = [] acts = [] rets = [] advs = [] gms = [] steps = 0 while steps < num_steps_per_iter: ep_obs = [] ep_rwds = [] ep_disc_rwds = [] ep_gms = [] ep_lmbs = [] t = 0 done = False ob = env.reset() while not done and steps < num_steps_per_iter: act = self.act(ob) ep_obs.append(ob) obs.append(ob) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) ep_rwds.append(rwd) ep_disc_rwds.append(rwd * (gamma_**t)) ep_gms.append(gamma_**t) ep_lmbs.append(lambda_**t) t += 1 steps += 1 if horizon is not None: if t >= horizon: done = True break if done: rwd_iter.append(np.sum(ep_rwds)) ep_obs = FloatTensor(np.array(ep_obs)) ep_rwds = FloatTensor(ep_rwds) ep_disc_rwds = FloatTensor(ep_disc_rwds) ep_gms = FloatTensor(ep_gms) ep_lmbs = FloatTensor(ep_lmbs) ep_disc_rets = FloatTensor( [sum(ep_disc_rwds[i:]) for i in range(t)]) ep_rets = ep_disc_rets / ep_gms rets.append(ep_rets) self.v.eval() curr_vals = self.v(ep_obs).detach() next_vals = torch.cat( (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach() ep_deltas = ep_rwds.unsqueeze(-1)\ + gamma_ * next_vals\ - curr_vals ep_advs = FloatTensor([ ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) * ep_deltas[j:]).sum() for j in range(t) ]) advs.append(ep_advs) gms.append(ep_gms) rwd_iter_means.append(np.mean(rwd_iter)) print("Iterations: {}, Reward Mean: {}".format( i + 1, np.mean(rwd_iter))) obs = FloatTensor(np.array(obs)) acts = FloatTensor(np.array(acts)) rets = torch.cat(rets) advs = torch.cat(advs) gms = torch.cat(gms) if normalize_advantage: advs = (advs - advs.mean()) / advs.std() self.pi.eval() old_log_pi = self.pi(obs).log_prob(acts).detach() self.pi.train() self.v.train() max_steps = num_epochs * (num_steps_per_iter // minibatch_size) for _ in range(max_steps): minibatch_indices = np.random.choice(range(steps), minibatch_size, False) mb_obs = obs[minibatch_indices] mb_acts = acts[minibatch_indices] mb_advs = advs[minibatch_indices] mb_rets = rets[minibatch_indices] mb_distb = self.pi(mb_obs) mb_log_pi = mb_distb.log_prob(mb_acts) mb_old_log_pi = old_log_pi[minibatch_indices] r = torch.exp(mb_log_pi - mb_old_log_pi) L_clip = torch.minimum( r * mb_advs, torch.clip(r, 1 - eps, 1 + eps) * mb_advs) L_vf = (self.v(mb_obs).squeeze() - mb_rets)**2 S = mb_distb.entropy() opt_pi.zero_grad() opt_v.zero_grad() loss = (-1) * (L_clip - c1 * L_vf + c2 * S).mean() loss.backward() opt_pi.step() opt_v.step() return rwd_iter_means
def main(): print('Beginning to train!\n') epoch_loss_lst = [] valid_loss_lst = [] num_epochs = args.epochs batch_size = state_dict['BATCH_SIZE'] training_acc_lst = [] validation_acc_lst = [] attn_lst = [] tst_preds = [] tst_true = [] for e in range(num_epochs): batch_loss = [] vloss_lst = [] trn_preds = [] val_preds = [] trn_true = [] val_true = [] attns = [] update_dict = {} for batch in tqdm(range(len(train_answer_list) // batch_size)): batch_range = list( range(batch * batch_size, (batch + 1) * batch_size)) optimizer.zero_grad() train_seq = train_layout_list[batch * batch_size] label = Variable( FloatTensor( np.matrix([train_answer_list[br] for br in batch_range]))) xtxt, attn, txtloss = attn_seq2seq.forward(train_qbatches[batch], train_lbatches[batch], train_obatches[batch]) attns.append(attn) if isinstance(training_task, VQAModuloTask): img = Variable(FloatTensor(train_images[batch_range, :, :, :])) xvis = training_task.module_dict['_Img'].forward(img) network = training_task.assemble(train_seq, xvis, xtxt) else: network = training_task.assemble(train_seq, None, xtxt) output = loss(network.squeeze(2), label.permute(1, 0)) + txtloss output.backward() if args.use_gradient_clipping: clip_grad_norm(param_list, max_norm=args.max_grad_norm) optimizer.step() if state_dict['GPU_SUPPORT']: label = label.cpu() output = output.cpu() network = network.cpu() trn_true.extend(list(label.permute(1, 0).data.numpy().flatten())) trn_preds.extend( list(sigmoid(network.squeeze(2)).data.numpy().flatten())) batch_loss.append(output.data.numpy()[0]) epoch_loss_lst.append(np.mean(batch_loss)) print('EPOCH {}/{} \n\tTRAINING LOSS = {}'.format( e + 1, num_epochs, epoch_loss_lst[-1])) update_dict['training_loss'] = epoch_loss_lst[-1] for vbatch in range(len(valid_answer_list) // batch_size): vbatch_range = list( range(vbatch * batch_size, (vbatch + 1) * batch_size)) valid_seq = valid_layout_list[vbatch * batch_size] valid_label = Variable( FloatTensor( np.matrix([valid_answer_list[br] for br in vbatch_range]))) vxtxt, _, vtxtloss = attn_seq2seq.forward(valid_qbatches[vbatch], valid_lbatches[vbatch], valid_obatches[vbatch]) if isinstance(training_task, VQAModuloTask): vimg = Variable( FloatTensor(valid_images[vbatch_range, :, :, :])) vx_vis = training_task.module_dict['_Img'].forward(vimg) network = training_task.assemble(valid_seq, vx_vis, vxtxt) else: network = training_task.assemble(valid_seq, None, vxtxt) output = loss(network.squeeze(2), valid_label.permute( 1, 0)) + vtxtloss if state_dict['GPU_SUPPORT']: valid_label = valid_label.cpu() output = output.cpu() network = network.cpu() val_true.extend( list(valid_label.permute(1, 0).data.numpy().flatten())) val_preds.extend( list(sigmoid(network.squeeze(2)).data.numpy().flatten())) vloss_lst.append(output.data.numpy()[0]) valid_loss_lst.append(np.mean(vloss_lst)) print('\tVALIDATION LOSS = {}'.format(valid_loss_lst[-1])) update_dict['validation_loss'] = valid_loss_lst[-1] trn_preds = sigmoid_map(trn_preds) val_preds = sigmoid_map(val_preds) training_acc = np.mean(np.array(trn_preds) == np.array(trn_true)) * 100 validation_acc = np.mean( np.array(val_preds) == np.array(val_true)) * 100 print('\tTRAINING ACCURACY: {}\n\tVALIDATION ACCURACY: {}'.format( training_acc, validation_acc)) training_acc_lst.append(training_acc) validation_acc_lst.append(validation_acc) attn_lst.append(attns) update_dict['training_accuracy'] = training_acc update_dict['validation_accuracy'] = validation_acc if state_dict['GPU_SUPPORT']: first_attn = torch.stack(attn_lst[-1][0]).permute( 1, 0, 2)[0].cpu().data.numpy() else: first_attn = torch.stack(attn_lst[-1][0]).permute( 1, 0, 2)[0].data.numpy() if args.visdom: update_visdom(e, update_dict, first_attn) if e % args.checkpoint_freq == 0: checkpoint_dict = { 'seq2seq': attn_seq2seq.state_dict(), 'optimizer': optimizer.state_dict(), } for mod_name, mod in training_task.module_dict.items(): checkpoint_dict[mod_name] = mod.state_dict() save_checkpoint(checkpoint_dict) print('\nSAVED CHECKPOINT\n') print('DONE TRAINING, EVALUATING TEST PERFORMANCE...') tloss_lst = [] for tbatch in range(len(test_answer_list) // batch_size): tbatch_range = list( range(tbatch * batch_size, (tbatch + 1) * batch_size)) test_seq = test_layout_list[tbatch * batch_size] test_label = Variable( FloatTensor( np.matrix([test_answer_list[br] for br in tbatch_range]))) txtxt, _, _ = attn_seq2seq.forward(test_qbatches[tbatch], test_lbatches[tbatch], test_obatches[tbatch]) if isinstance(training_task, VQAModuloTask): timg = torch.autograd.Variable( FloatTensor(valid_images[tbatch_range, :, :, :])) tx_vis = training_task.module_dict['_Img'].forward(timg) network = training_task.assemble(test_seq, tx_vis, txtxt) else: network = training_task.assemble(test_seq, None, txtxt) output = loss(network.squeeze(2), test_label.permute(1, 0)) if state_dict['GPU_SUPPORT']: test_label = test_label.cpu() output = output.cpu() network = network.cpu() tst_true.extend(list(test_label.permute(1, 0).data.numpy().flatten())) tst_preds.extend( list(sigmoid(network.squeeze(2)).data.numpy().flatten())) tloss_lst.append(output.data.numpy()[0]) tst_preds = sigmoid_map(tst_preds) tst_acc = np.mean(np.array(tst_preds) == np.array(tst_true)) * 100 print('TESTING LOSS: {}\nTESTING ACCURACY: {}'.format( np.mean(tloss_lst), tst_acc))
def forward(self, x, targets=None, img_dim=None): self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view( num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size, ).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape).cuda(self.device) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: ( iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf, ) = utils.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, device=self.device, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = (self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj) loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum( iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum( iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum( iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": utils.to_cpu(total_loss).item(), "x": utils.to_cpu(loss_x).item(), "y": utils.to_cpu(loss_y).item(), "w": utils.to_cpu(loss_w).item(), "h": utils.to_cpu(loss_h).item(), "conf": utils.to_cpu(loss_conf).item(), "cls": utils.to_cpu(loss_cls).item(), "cls_acc": utils.to_cpu(cls_acc).item(), "recall50": utils.to_cpu(recall50).item(), "recall75": utils.to_cpu(recall75).item(), "precision": utils.to_cpu(precision).item(), "conf_obj": utils.to_cpu(conf_obj).item(), "conf_noobj": utils.to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def train(self, env, render=False): lr = self.train_config["lr"] num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] horizon = self.train_config["horizon"] discount = self.train_config["discount"] normalize_advantage = self.train_config["normalize_advantage"] opt_pi = torch.optim.Adam(self.pi.parameters(), lr) opt_v = torch.optim.Adam(self.v.parameters(), lr) rwd_iter_means = [] rwd_iter = [] i = 0 steps = 0 while i < num_iters: obs = [] acts = [] rwds = [] disc_rwds = [] disc = [] t = 0 done = False ob = env.reset() while not done: act = self.act(ob) obs.append(ob) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) rwds.append(rwd) disc_rwds.append(rwd * (discount**t)) disc.append(discount**t) t += 1 steps += 1 if steps == num_steps_per_iter: rwd_iter_means.append(np.mean(rwd_iter)) print("Iterations: {}, Reward Mean: {}".format( i + 1, np.mean(rwd_iter))) i += 1 steps = 0 rwd_iter = [] if horizon is not None: if t >= horizon: done = True break rwd_iter.append(np.sum(rwds)) obs = FloatTensor(np.array(obs)) acts = FloatTensor(np.array(acts)) rwds = FloatTensor(rwds) disc = FloatTensor(disc) ### disc_rets = FloatTensor( [sum(disc_rwds[i:]) for i in range(len(disc_rwds))]) rets = disc_rets / disc ### self.v.eval() curr_vals = self.v(obs) next_vals = torch.cat((self.v(obs)[1:], FloatTensor([[0.]]))) advantage = (rwds.unsqueeze(-1) + discount * next_vals - curr_vals).detach() if normalize_advantage: advantage = (advantage - advantage.mean()) / advantage.std() # print(advantage.shape, obs.shape, disc.shape) delta = (rets - self.v(obs).squeeze()).detach() self.v.train() opt_v.zero_grad() # loss = (0.5) * ( # rwds.unsqueeze(-1) # + discount * next_vals.detach() # - self.v(obs) # ) ** 2 loss = (-1) * disc * delta * self.v(obs).squeeze() # loss = (0.5) * ((rets - self.v(obs).squeeze()) ** 2) # loss = (-1) * disc.unsqueeze(-1) * advantage * self.v(obs) # print(loss.shape) loss.mean().backward() opt_v.step() self.pi.train() distb = self.pi(obs) opt_pi.zero_grad() loss = (-1) * disc.unsqueeze(-1) * advantage * distb.log_prob(acts) loss.mean().backward() opt_pi.step() return rwd_iter_means
def train(self, env, render=False): lr = self.train_config["lr"] num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] horizon = self.train_config["horizon"] discount = self.train_config["discount"] max_kl = self.train_config["max_kl"] cg_damping = self.train_config["cg_damping"] normalize_return = self.train_config["normalize_return"] use_baseline = self.train_config["use_baseline"] if use_baseline: opt_v = torch.optim.Adam(self.v.parameters(), lr) rwd_iter_means = [] for i in range(num_iters): rwd_iter = [] obs = [] acts = [] rets = [] disc = [] steps = 0 while steps < num_steps_per_iter: ep_rwds = [] ep_disc_rwds = [] ep_disc = [] t = 0 done = False ob = env.reset() while not done and steps < num_steps_per_iter: act = self.act(ob) obs.append(ob) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) ep_rwds.append(rwd) ep_disc_rwds.append(rwd * (discount ** t)) ep_disc.append(discount ** t) t += 1 steps += 1 if horizon is not None: if t >= horizon: done = True break ep_disc = FloatTensor(ep_disc) ep_disc_rets = FloatTensor( [sum(ep_disc_rwds[i:]) for i in range(t)] ) ep_rets = ep_disc_rets / ep_disc rets.append(ep_rets) disc.append(ep_disc) if done: rwd_iter.append(np.sum(ep_rwds)) rwd_iter_means.append(np.mean(rwd_iter)) print( "Iterations: {}, Reward Mean: {}" .format(i + 1, np.mean(rwd_iter)) ) obs = FloatTensor(np.array(obs)) acts = FloatTensor(np.array(acts)) rets = torch.cat(rets) disc = torch.cat(disc) if normalize_return: rets = (rets - rets.mean()) / rets.std() if use_baseline: self.v.eval() delta = (rets - self.v(obs).squeeze()).detach() self.v.train() opt_v.zero_grad() loss = (-1) * disc * delta * self.v(obs).squeeze() loss.mean().backward() opt_v.step() self.pi.train() old_params = get_flat_params(self.pi).detach() old_distb = self.pi(obs) def L(): distb = self.pi(obs) if use_baseline: return (disc * delta * torch.exp( distb.log_prob(acts) - old_distb.log_prob(acts).detach() )).mean() else: return (disc * rets * torch.exp( distb.log_prob(acts) - old_distb.log_prob(acts).detach() )).mean() def kld(): distb = self.pi(obs) if self.discrete: old_p = old_distb.probs.detach() p = distb.probs return (old_p * (torch.log(old_p) - torch.log(p)))\ .sum(-1)\ .mean() else: old_mean = old_distb.mean.detach() old_cov = old_distb.covariance_matrix.sum(-1).detach() mean = distb.mean cov = distb.covariance_matrix.sum(-1) return (0.5) * ( (old_cov / cov).sum(-1) + (((old_mean - mean) ** 2) / cov).sum(-1) - self.action_dim + torch.log(cov).sum(-1) - torch.log(old_cov).sum(-1) ).mean() grad_kld_old_param = get_flat_grads(kld(), self.pi) def Hv(v): hessian = get_flat_grads( torch.dot(grad_kld_old_param, v), self.pi ).detach() return hessian + cg_damping * v g = get_flat_grads(L(), self.pi).detach() s = conjugate_gradient(Hv, g).detach() Hs = Hv(s).detach() new_params = rescale_and_linesearch( g, s, Hs, max_kl, L, kld, old_params, self.pi ) set_params(self.pi, new_params) return rwd_iter_means
def __init__(self): super(VGG, self).__init__() vgg = vgg19(pretrained=True) self.vgg_mean = FloatTensor([[[[0.485]], [[0.456]], [[0.406]]]]) self.vgg_std = FloatTensor([[[[0.229]], [[0.224]], [[0.225]]]]) self.vgg_relu4_4 = vgg.features[:27]
def train(self, env, render=False): lr = self.train_config["lr"] num_iters = self.train_config["num_iters"] num_steps_per_iter = self.train_config["num_steps_per_iter"] horizon = self.train_config["horizon"] discount = self.train_config["discount"] normalize_return = self.train_config["normalize_return"] use_baseline = self.train_config["use_baseline"] opt_pi = torch.optim.Adam(self.pi.parameters(), lr) if use_baseline: opt_v = torch.optim.Adam(self.v.parameters(), lr) rwd_iter_means = [] rwd_iter = [] i = 0 steps = 0 while i < num_iters: obs = [] acts = [] rwds = [] disc_rwds = [] disc = [] t = 0 done = False ob = env.reset() while not done: act = self.act(ob) obs.append(ob) acts.append(act) if render: env.render() ob, rwd, done, info = env.step(act) rwds.append(rwd) disc_rwds.append(rwd * (discount**t)) disc.append(discount**t) t += 1 steps += 1 if steps == num_steps_per_iter: rwd_iter_means.append(np.mean(rwd_iter)) print("Iterations: {}, Reward Mean: {}".format( i + 1, np.mean(rwd_iter))) i += 1 steps = 0 rwd_iter = [] if horizon is not None: if t >= horizon: done = True break rwd_iter.append(np.sum(rwds)) obs = FloatTensor(np.array(obs)) acts = FloatTensor(np.array(acts)) disc = FloatTensor(disc) disc_rets = FloatTensor( [sum(disc_rwds[i:]) for i in range(len(disc_rwds))]) rets = disc_rets / disc if normalize_return: rets = (rets - rets.mean()) / rets.std() if use_baseline: self.v.eval() delta = (rets - self.v(obs).squeeze()).detach() self.v.train() opt_v.zero_grad() loss = (-1) * disc * delta * self.v(obs).squeeze() loss.mean().backward() opt_v.step() self.pi.train() distb = self.pi(obs) opt_pi.zero_grad() if use_baseline: loss = (-1) * disc * delta * distb.log_prob(acts) else: loss = (-1) * disc * distb.log_prob(acts) * rets loss.mean().backward() opt_pi.step() return rwd_iter_means
def forward(self, input): vgg_mean = FloatTensor([[[[0.485]], [[0.456]], [[0.406]]]]) vgg_std = FloatTensor([[[[0.229]], [[0.224]], [[0.225]]]]) return self.vgg_relu4_4((input - vgg_mean) / vgg_std)
def train(self, input: ByteTensor, target_class: int): """Train the machine with a single example. :param input: Input vector. Shape (feature_count, ) :param target_class: Correct class for input. """ clause_outputs = self.evaluate_clauses(input) class_sum = self.sum_up_class_votes(clause_outputs) ##################################### ### Calculate Feedback to Clauses ### ##################################### pos_feedback = ByteTensor(*self.clause_shape).zero_() neg_feedback = ByteTensor(*self.clause_shape).zero_() # Process negative targets threshold = (1.0 / (self.threshold * 2)) * \ (self.threshold + class_sum.float()) threshold = threshold.view(1, self.class_count, 1, 1) threshold = threshold.expand(*self.clause_shape) feedback_rand = FloatTensor(2, self.class_count, self.clauses_per_class // 2, 1).uniform_() feedback_threshold = feedback_rand <= threshold neg_feedback[0] = feedback_threshold[0] pos_feedback[1] = feedback_threshold[1] # Process target feedback_rand = FloatTensor(2, self.clauses_per_class // 2, 1).uniform_() feedback_threshold = ( feedback_rand <= (1.0 / (self.threshold * 2)) * (self.threshold - class_sum[target_class].float())) pos_feedback[0, target_class] = feedback_threshold[0] neg_feedback[1, target_class] = feedback_threshold[1] neg_feedback[0, target_class] = 0 pos_feedback[1, target_class] = 0 ################################# ### Train Individual Automata ### ################################# low_prob = FloatTensor(*self.action.shape).uniform_() <= 1 / self.s high_prob = FloatTensor( *self.action.shape).uniform_() <= (self.s - 1) / self.s pos_feedback = pos_feedback.expand_as(low_prob) neg_feedback = neg_feedback.expand_as(low_prob) clauses = clause_outputs.expand_as(low_prob) not_clauses = clauses ^ 1 X = input.expand_as(low_prob) #---------------------- Start CUDA if use_cuda: increment, decrement, inv_increment, inv_decrement = \ learn(clauses, X, low_prob, high_prob, pos_feedback, neg_feedback, self.action, self.inv_action) else: inv_X = (input ^ 1).expand_as(low_prob) notclause_low = not_clauses & low_prob & pos_feedback clause_x_high = clauses & X & high_prob & pos_feedback clause_notx_low = clauses & inv_X & low_prob & pos_feedback clause_notx_high = clauses & inv_X & high_prob & pos_feedback clause_x_low = clauses & X & low_prob & pos_feedback clause_notx_notaction = clauses & inv_X & (self.action ^ 1) & neg_feedback clause_x_noninvaction = clauses & X & (self.inv_action ^ 1) & neg_feedback # The learning algorithm will increment, decrement, or leave untouched # every automata. You can see the exclusiveness in the following logic. increment = clause_x_high | clause_notx_notaction decrement = notclause_low | clause_notx_low inv_increment = clause_x_noninvaction | clause_notx_high inv_decrement = clause_x_low | notclause_low #----------------------- End CUDA delta = increment.int() - decrement.int() inv_delta = inv_increment.int() - inv_decrement.int() self.automata += delta self.inv_automata += inv_delta # Keep automata in bounds [0, 2 * states] self.automata.clamp(1, 2 * self.states) self.inv_automata.clamp(1, 2 * self.states) self.update_action()
""" =========================================== Compatible with torch and tensorflow =========================================== """ # Author: Chaojie Wang <*****@*****.**>; Jiawen Wu <*****@*****.**> # Jiawen Wu <*****@*****.**>; Chaojie Wang <*****@*****.**> import warnings try: import tensorflow as tf gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) tf.Variable(1) except: try: from torch.cuda import FloatTensor x = FloatTensor(1) except: warnings.warn( "not find torch or tensorflow packages,DSG may be error after running a torch or tensorflow code" )