Пример #1
0
    def generate(self, batch_size, noises=None):
        if noises == None:
            noises = FloatTensor(
                np.random.normal(size=[batch_size, self.latent_depth])
            )
        else:
            noises = FloatTensor(noises)

        self.generator.eval()
        faked_samples = self.generator(noises)

        return faked_samples
Пример #2
0
    def collate_fn(input_batch):
        order_id = [d['order_id'] for d in input_batch]
        product_history = [d['product_history'] for d in input_batch]
        product_history_lengths = [len(h) for h in product_history]
        next_product = [d['next_product'] for d in input_batch]
        target = [d['target'] for d in input_batch]

        # Sort the examples following product_history_lengths in reverse order.
        sort_index = np.argsort(product_history_lengths)[::-1]
        reorder = lambda l: [l[i] for i in sort_index]

        order_id = reorder(order_id)
        product_history = reorder(product_history)
        product_history_lengths = reorder(product_history_lengths)
        next_product = reorder(next_product)
        target = reorder(target)

        # Pad the product history sequences.
        pad = lambda h, length: np.pad(h, pad_width=(0, length - len(h)), mode='constant', constant_values=(0, 0))
        product_history = np.array([pad(h, product_history_lengths[0]) for h in product_history])
        product_history = to_var(torch.from_numpy(product_history))

        output_batch = {}
        output_batch['order_id'] = order_id
        output_batch['product_history'] = product_history
        output_batch['product_history_lengths'] = product_history_lengths
        output_batch['next_product'] = to_var(LongTensor(next_product))
        output_batch['target'] = to_var(FloatTensor(target))

        return output_batch
Пример #3
0
def compute_pairwise_loss_in_euclidean(f1, f2, threshold):
    batch_size1 = f1.size(0)
    batch_size2 = f2.size(0)
    pairwise_diff = f1.unsqueeze(1).expand(
        -1, batch_size2, -1) - f2.unsqueeze(0).expand(batch_size1, -1, -1)
    pairwise_l2 = torch.mean(pairwise_diff**2, dim=2)
    pairwise_l2_mask = torch.where(
        pairwise_l2 <= threshold,
        Variable(FloatTensor(batch_size1, batch_size2).fill_(1.0),
                 requires_grad=False),
        Variable(FloatTensor(batch_size1, batch_size2).fill_(0.0),
                 requires_grad=False))

    pairwise_l2_exp = torch.exp(-pairwise_l2 / float(threshold))
    return torch.sum(pairwise_l2 * pairwise_l2_mask *
                     pairwise_l2_exp) / torch.sum(pairwise_l2_mask)
Пример #4
0
    def act(self, state):
        self.pi.eval()

        state = FloatTensor(state)
        distb = self.pi(state)

        action = distb.sample().detach().cpu().numpy()

        return action
Пример #5
0
def build_targets(pred_boxes, pred_cls, target, anchors, ignore_thres, device):

    n_b = pred_boxes.size(0)
    n_a = pred_boxes.size(1)
    n_c = pred_cls.size(-1)
    n_g = pred_boxes.size(2)

    # Output tensors
    obj_mask = BoolTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    noobj_mask = BoolTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(1)
    class_mask = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    iou_scores = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    tx = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    ty = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    tw = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    th = FloatTensor(n_b, n_a, n_g, n_g).cuda(device).fill_(0)
    tcls = FloatTensor(n_b, n_a, n_g, n_g, n_c).cuda(device).fill_(0)

    # Convert to position relative to box
    target_boxes = target[:, 2:6] * n_g
    gxy = target_boxes[:, :2]
    gwh = target_boxes[:, 2:]
    # Get anchors with best iou
    ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in anchors])
    _, best_n = ious.max(0)
    # Separate target values
    b, target_labels = target[:, :2].long().t()
    gx, gy = gxy.t()
    gw, gh = gwh.t()
    gi, gj = gxy.long().t()
    # Set masks
    obj_mask[b, best_n, gj, gi] = 1
    noobj_mask[b, best_n, gj, gi] = 0

    # Set noobj mask to zero where iou exceeds ignore threshold
    for i, anchor_ious in enumerate(ious.t()):
        noobj_mask[b[i], anchor_ious > ignore_thres, gj[i], gi[i]] = 0

    # Coordinates
    tx[b, best_n, gj, gi] = gx - gx.floor()
    ty[b, best_n, gj, gi] = gy - gy.floor()
    # Width and height
    tw[b, best_n, gj, gi] = torch.log(gw / anchors[best_n][:, 0] + 1e-16)
    th[b, best_n, gj, gi] = torch.log(gh / anchors[best_n][:, 1] + 1e-16)
    # One-hot encoding of label
    tcls[b, best_n, gj, gi, target_labels] = 1
    # Compute label correctness and iou at best anchor
    class_mask[b, best_n, gj,
               gi] = (pred_cls[b, best_n, gj,
                               gi].argmax(-1) == target_labels).float()
    iou_scores[b, best_n, gj, gi] = bbox_iou(pred_boxes[b, best_n, gj, gi],
                                             target_boxes,
                                             x1y1x2y2=False)

    tconf = obj_mask.float()
    return iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf
Пример #6
0
    def train_one_step(self, real_samples):
        batch_size = real_samples.shape[0]

        real_samples = FloatTensor(real_samples)
        noises = FloatTensor(np.random.normal(size=[batch_size, self.latent_depth]))

        self.generator.train()
        self.discriminator.train()

        faked_samples = self.generator(noises)
        faked_scores = self.discriminator(faked_samples)

        generator_loss = torch.nn.functional.binary_cross_entropy_with_logits(
            input=faked_scores, target=torch.ones_like(faked_scores)
        )
        
        self.generator_opt.zero_grad()
        generator_loss.backward()
        self.generator_opt.step()

        real_scores = self.discriminator(real_samples)

        faked_samples = self.generator(noises)
        faked_scores = self.discriminator(faked_samples)

        discriminator_loss = \
            torch.nn.functional.binary_cross_entropy_with_logits(
                input=real_scores, target=torch.ones_like(real_scores)
            ) + \
                torch.nn.functional.binary_cross_entropy_with_logits(
                    input=faked_scores, target=torch.zeros_like(faked_scores)
                )

        self.discriminator_opt.zero_grad()
        discriminator_loss.backward()
        self.discriminator_opt.step()

        return generator_loss.item(), discriminator_loss.item()
Пример #7
0
def knn_indices_func_gpu(
        seed: cuda.FloatTensor,  # (B,C,npoint)
        pts: cuda.FloatTensor,  # (B,C,N)
        k: int) -> cuda.LongTensor:  # (N,npoint,K)
    """knn indices func reimplemented

    Args:
        seed    (cuda.FloatTensor)  : clusting seed->(B,C,npoint)
        pts     (cuda.FloatTensor)  : pointcloud using clusting method->(B,C,N) 
        l       (int)               : k neibor in knn 
    Returns:
        cuda.LongTensor: knn idx(B,npoint,k)
    """
    _, _, N = seed.shape
    _, _, M = pts.shape
    mseed = seed.unsqueeze(-1).expand(-1, -1, -1, M)
    mpts = pts.unsqueeze(-2).expand(-1, -1, N, -1)
    mdist = torch.sum((mpts - mseed)**2, dim=1)
    # print("mseed:", mseed.shape, "\nmpts:",
    #       mpts.shape, "\nmdist:", mdist.shape)
    _, idx = torch.topk(mdist, k=k + 1, largest=False)

    return idx[:, :, 1:]
Пример #8
0
    def forward(self, x):
        if self.config['instance_norm']:
            x = self.inst_norm(x)

        for i in range(3):
            x = self.linears[i](x)
            if self.config['is_bn']:
                x = self.bns[i](x)
            x = self.lr(x)

        x = self.mp2_2(x)
        x = self.drop(x)
        if self.training and self.config['std'] > 10**-4:
            x += Variable(FloatTensor(x.size()).normal_())*self.config['std']

        for i in range(3, 6):
            x = self.linears[i](x)
            if self.config['is_bn']:
                x = self.bns[i](x)
            x = self.lr(x)

        x = self.mp2_2(x)
        x = self.drop(x)
        if self.training and self.config['std'] > 10**-4:
            x += Variable(FloatTensor(x.size()).normal_())*self.config['std']

        for i in range(6, self.nb_layer):
            x = self.linears[i](x)
            if self.config['is_bn']:
                x = self.bns[i](x)
            x = self.lr(x)

        x = self.avg_6(x)
        x = x.view(x.size(0),-1)

        return x
Пример #9
0
 def compute_grid_offsets(self, grid_size, cuda):
     self.grid_size = grid_size
     g = self.grid_size
     self.stride = self.img_dim / self.grid_size
     # Calculate offsets for each grid
     self.grid_x = (torch.arange(g).repeat(g, 1).view(
         [1, 1, g, g]).type(FloatTensor).cuda(self.device))
     self.grid_y = (torch.arange(g).repeat(g, 1).t().view(
         [1, 1, g, g]).type(FloatTensor).cuda(self.device))
     self.scaled_anchors = FloatTensor([
         (a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors
     ]).cuda(self.device)
     self.anchor_w = self.scaled_anchors[:, 0:1].view(
         (1, self.num_anchors, 1, 1))
     self.anchor_h = self.scaled_anchors[:, 1:2].view(
         (1, self.num_anchors, 1, 1))
    def filter_batch(self, batch, percentile):
        rewards = list(map(lambda s: s.reward, batch))
        reward_bound = np.percentile(rewards, percentile)
        reward_mean = float(np.mean(rewards))

        train_observations = []
        train_actions = []

        for episode in batch:
            if episode.reward < reward_bound:
                continue

            train_observations.extend(
                map(lambda step: step.state, episode.steps))
            train_actions.extend(map(lambda step: step.action, episode.steps))

        return FloatTensor(train_observations), LongTensor(
            train_actions), reward_bound, reward_mean  # TODO use .to(device)
Пример #11
0
    def forward(self, x):
        self.drop(x)
        if self.training and self.config['std'] > 10**-4:
            x += Variable(FloatTensor(x.size()).normal_())*self.config['std']

        for i in range(3):
            x = self.linears[i](x)
            if self.config['is_bn']:
                x = self.bns[i](x)
            x = self.lr(x)

        # do not call average pooling for image with length 28:
        if self.is_avg_pool:
            x = self.avg_6(x)
        x = x.view(x.size(0),-1)

        x = self.linears[3](x)
        if self.config['is_bn']:
            x = self.bns[3](x)
        return x
Пример #12
0
 def interpolation(self, uvm, image, index):
     u, v = torch.index_select(uvm, dim=1, index=LongTensor([0+3*index,
                               1+3*index])).permute(0, 2, 3, 1).split(1, dim=3)
     row_num = FloatTensor()
     col_num = FloatTensor()
     im_size = image.shape[2:4]
     torch.arange(im_size[0], out=row_num)
     torch.arange(im_size[1], out=col_num)
     row_num = row_num.view(1, im_size[0], 1, 1)
     col_num = col_num.view(1, 1, im_size[1], 1)
     x_norm = 2*(u+col_num)/(im_size[1]-1)-1
     y_norm = 2*(v+row_num)/(im_size[0]-1)-1
     xy_norm = torch.clamp(torch.cat((x_norm, y_norm), dim=3), -1, 1)
     interp = nn.functional.grid_sample(image, xy_norm)
     w = torch.index_select(uvm, dim=1, index=LongTensor([3*index+2]))+0.5
     return interp, w, u, v
 def action_probabilities(self, state):
     state_tensor = FloatTensor([state])  # TODO use .to(device)
     action_probs = self.softmax(
         self.net(state_tensor)).cpu().data.numpy()[0]
     return action_probs
Пример #14
0
 def random_z(self, batch_size):
     return Variable(
         FloatTensor(np.random.normal(0, 1, (batch_size, self.latent_dim))))
Пример #15
0
 def get_target(_, batch_size):
     return Variable(
         FloatTensor(np.random.normal(0.05, 0.05, (batch_size, 1))))
Пример #16
0
use_gpu=True
if use_gpu:
    from torch.cuda import FloatTensor, LongTensor, ByteTensor
    def to_gpu(x):
        return x.cuda()
else:
    from torch import FloatTensor, LongTensor, ByteTensor
    def to_gpu(x):
        return x.cpu()

x1 = FloatTensor()
x2 = ByteTensor()
# the below function is from the Pytorch forums
# https://discuss.pytorch.org/t/access-gpu-memory-usage-in-pytorch/3192/3
import subprocess
def get_gpu_memory_map():
    """Get the current gpu usage.
    Returns
    -------
    usage: dict
        Keys are device ids as integers.
        Values are memory usage as integers in MB.
    """
    try:
        result = subprocess.check_output(
            [
                'nvidia-smi', '--query-gpu=memory.used',
                '--format=csv,nounits,noheader'
            ], encoding='utf-8')
        # Convert lines into a dictionary
        gpu_memory = [int(x) for x in result.strip().split('\n')]
Пример #17
0
    def train(self):
        num_channels = self.config.NUM_CHANNELS
        use_cuda = self.config.USE_CUDA
        lr = self.config.LEARNING_RATE

        # Networks
        netG_A2B = Generator(num_channels)
        netG_B2A = Generator(num_channels)
        netD_A = Discriminator(num_channels)
        netD_B = Discriminator(num_channels)

        #netG_A2B = Generator_BN(num_channels)
        #netG_B2A = Generator_BN(num_channels)
        #netD_A = Discriminator_BN(num_channels)
        #netD_B = Discriminator_BN(num_channels)

        if use_cuda:
            netG_A2B.cuda()
            netG_B2A.cuda()
            netD_A.cuda()
            netD_B.cuda()

        netG_A2B.apply(weights_init_normal)
        netG_B2A.apply(weights_init_normal)
        netD_A.apply(weights_init_normal)
        netD_B.apply(weights_init_normal)

        criterion_GAN = torch.nn.BCELoss()
        criterion_cycle = torch.nn.L1Loss()
        criterion_identity = torch.nn.L1Loss()

        optimizer_G = torch.optim.Adam(itertools.chain(netG_A2B.parameters(), netG_B2A.parameters()),
                                       lr=lr, betas=(0.5, 0.999))
        optimizer_D_A = torch.optim.Adam(netD_A.parameters(), lr=lr, betas=(0.5, 0.999))
        optimizer_D_B = torch.optim.Adam(netD_B.parameters(), lr=lr, betas=(0.5, 0.999))

        lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR(optimizer_G, lr_lambda=LambdaLR(self.config.EPOCH, 0,
                                                                                           self.config.EPOCH//2).step)
        lr_scheduler_D_A = torch.optim.lr_scheduler.LambdaLR(optimizer_D_A, lr_lambda=LambdaLR(self.config.EPOCH, 0,
                                                                                           self.config.EPOCH//2).step)
        lr_scheduler_D_B = torch.optim.lr_scheduler.LambdaLR(optimizer_D_B, lr_lambda=LambdaLR(self.config.EPOCH, 0,
                                                                                           self.config.EPOCH//2).step)

        # Inputs & targets memory allocation
        #Tensor = LongTensor if use_cuda else torch.Tensor
        batch_size = self.config.BATCH_SIZE
        height, width, channels = self.config.INPUT_SHAPE

        input_A = FloatTensor(batch_size, channels, height, width)
        input_B = FloatTensor(batch_size, channels, height, width)
        target_real = Variable(FloatTensor(batch_size).fill_(1.0), requires_grad=False)
        target_fake = Variable(FloatTensor(batch_size).fill_(0.0), requires_grad=False)

        fake_A_buffer = ReplayBuffer()
        fake_B_buffer = ReplayBuffer()

        transforms_ = [transforms.RandomCrop((height, width)),
                       transforms.RandomHorizontalFlip(),
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]

        dataloader = DataLoader(ImageDataset(self.config.DATA_DIR, self.config.DATASET_A, self.config.DATASET_B,
                                             transforms_=transforms_, unaligned=True),
                                             batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
        # Loss plot
        logger = Logger(self.config.EPOCH, len(dataloader))

        now = datetime.datetime.now()
        datetime_sequence = "{0}{1:02d}{2:02d}_{3:02}{4:02d}".format(str(now.year)[-2:], now.month, now.day ,
                                                                    now.hour, now.minute)

        output_name_1 = self.config.DATASET_A + "2" + self.config.DATASET_B
        output_name_2 = self.config.DATASET_B + "2" + self.config.DATASET_A

        experiment_dir = os.path.join(self.config.RESULT_DIR, datetime_sequence)

        sample_output_dir_1 = os.path.join(experiment_dir, "sample", output_name_1)
        sample_output_dir_2 = os.path.join(experiment_dir, "sample", output_name_2)
        weights_output_dir_1 = os.path.join(experiment_dir, "weights", output_name_1)
        weights_output_dir_2 = os.path.join(experiment_dir, "weights", output_name_2)
        weights_output_dir_resume = os.path.join(experiment_dir, "weights", "resume")

        os.makedirs(sample_output_dir_1, exist_ok=True)
        os.makedirs(sample_output_dir_2, exist_ok=True)
        os.makedirs(weights_output_dir_1, exist_ok=True)
        os.makedirs(weights_output_dir_2, exist_ok=True)
        os.makedirs(weights_output_dir_resume, exist_ok=True)

        counter = 0

        for epoch in range(self.config.EPOCH):
            """
            logger.loss_df.to_csv(os.path.join(experiment_dir,
                                 self.config.DATASET_A + "_"
                                 + self.config.DATASET_B + ".csv"),
                    index=False)
            """
            if epoch % 100 == 0:
                torch.save(netG_A2B.state_dict(), os.path.join(weights_output_dir_1, str(epoch).zfill(4) + 'netG_A2B.pth'))
                torch.save(netG_B2A.state_dict(), os.path.join(weights_output_dir_2, str(epoch).zfill(4) + 'netG_B2A.pth'))
                torch.save(netD_A.state_dict(), os.path.join(weights_output_dir_1, str(epoch).zfill(4) + 'netD_A.pth'))
                torch.save(netD_B.state_dict(), os.path.join(weights_output_dir_2, str(epoch).zfill(4) + 'netD_B.pth'))

            for i, batch in enumerate(dataloader):
                # Set model input
                real_A = Variable(input_A.copy_(batch['A']))
                real_B = Variable(input_B.copy_(batch['B']))

                ###### Generators A2B and B2A ######
                optimizer_G.zero_grad()

                # GAN loss
                fake_B = netG_A2B(real_A)
                pred_fake_B = netD_B(fake_B)
                loss_GAN_A2B = criterion_GAN(pred_fake_B, target_real)

                fake_A = netG_B2A(real_B)
                pred_fake_A = netD_A(fake_A)
                loss_GAN_B2A = criterion_GAN(pred_fake_A, target_real)

                # Cycle loss
                recovered_A = netG_B2A(fake_B)
                loss_cycle_ABA = criterion_cycle(recovered_A, real_A) * 10.0

                recovered_B = netG_A2B(fake_A)
                loss_cycle_BAB = criterion_cycle(recovered_B, real_B) * 10.0

                # Total loss
                loss_G = loss_GAN_A2B + loss_GAN_B2A + loss_cycle_ABA + loss_cycle_BAB
                loss_G.backward()

                optimizer_G.step()
                ###################################

                ###### Discriminator A ######
                optimizer_D_A.zero_grad()

                # Real loss
                pred_A = netD_A(real_A)
                loss_D_real = criterion_GAN(pred_A, target_real)

                # Fake loss
                fake_A_ = fake_A_buffer.push_and_pop(fake_A)
                pred_fake = netD_A(fake_A_.detach())
                loss_D_fake = criterion_GAN(pred_fake, target_fake)

                # Total loss
                loss_D_A = (loss_D_real + loss_D_fake) * 0.5
                loss_D_A.backward()

                optimizer_D_A.step()
                ###################################

                ###### Discriminator B ######
                optimizer_D_B.zero_grad()

                # Real loss
                pred_B = netD_B(real_B)
                loss_D_real = criterion_GAN(pred_B, target_real)

                # Fake loss
                fake_B_ = fake_B_buffer.push_and_pop(fake_B)
                pred_fake = netD_B(fake_B_.detach())
                loss_D_fake = criterion_GAN(pred_fake, target_fake)

                # Total loss
                loss_D_B = (loss_D_real + loss_D_fake) * 0.5
                loss_D_B.backward()

                optimizer_D_B.step()

                # Progress report (http://localhost:8097)
                logger.log({'loss_G': loss_G,
                            'loss_G_GAN': (loss_GAN_A2B + loss_GAN_B2A),
                            'loss_G_cycle': (loss_cycle_ABA + loss_cycle_BAB), 'loss_D': (loss_D_A + loss_D_B)},
                           images={'real_A': real_A, 'real_B': real_B, 'fake_A': fake_A, 'fake_B': fake_B})

                if counter % 500 == 0:
                    real_A_sample = real_A.cpu().detach().numpy()[0]
                    pred_A_sample = fake_A.cpu().detach().numpy()[0]
                    real_B_sample = real_B.cpu().detach().numpy()[0]
                    pred_B_sample = fake_B.cpu().detach().numpy()[0]
                    combine_sample_1 = np.concatenate([real_A_sample, pred_B_sample], axis=2)
                    combine_sample_2 = np.concatenate([real_B_sample, pred_A_sample], axis=2)

                    file_1 = "{0}_{1}.jpg".format(epoch, counter)
                    output_sample_image(os.path.join(sample_output_dir_1, file_1), combine_sample_1)
                    file_2 = "{0}_{1}.jpg".format(epoch, counter)
                    output_sample_image(os.path.join(sample_output_dir_2, file_2), combine_sample_2)

                counter += 1


            # Update learning rates
            lr_scheduler_G.step()
            lr_scheduler_D_A.step()
            lr_scheduler_D_B.step()

        torch.save(netG_A2B.state_dict(), os.path.join(weights_output_dir_1, str(self.config.EPOCH).zfill(4) + 'netG_A2B.pth'))
        torch.save(netG_B2A.state_dict(), os.path.join(weights_output_dir_2, str(self.config.EPOCH).zfill(4) + 'netG_B2A.pth'))
        torch.save(netD_A.state_dict(), os.path.join(weights_output_dir_1, str(self.config.EPOCH).zfill(4) + 'netD_A.pth'))
        torch.save(netD_B.state_dict(), os.path.join(weights_output_dir_2, str(self.config.EPOCH).zfill(4) + 'netD_B.pth'))
Пример #18
0
    def train(self, env, expert, render=False):
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        horizon = self.train_config["horizon"]
        lambda_ = self.train_config["lambda"]
        gae_gamma = self.train_config["gae_gamma"]
        gae_lambda = self.train_config["gae_lambda"]
        eps = self.train_config["epsilon"]
        max_kl = self.train_config["max_kl"]
        cg_damping = self.train_config["cg_damping"]
        normalize_advantage = self.train_config["normalize_advantage"]

        opt_d = torch.optim.Adam(self.d.parameters())

        exp_rwd_iter = []

        exp_obs = []
        exp_acts = []

        steps = 0
        while steps < num_steps_per_iter:
            ep_obs = []
            ep_rwds = []

            t = 0
            done = False

            ob = env.reset()

            while not done and steps < num_steps_per_iter:
                act = expert.act(ob)

                ep_obs.append(ob)
                exp_obs.append(ob)
                exp_acts.append(act)

                if render:
                    env.render()
                ob, rwd, done, info = env.step(act)

                ep_rwds.append(rwd)

                t += 1
                steps += 1

                if horizon is not None:
                    if t >= horizon:
                        break

            if done:
                exp_rwd_iter.append(np.sum(ep_rwds))

            ep_obs = FloatTensor(ep_obs)
            ep_rwds = FloatTensor(ep_rwds)

        exp_rwd_mean = np.mean(exp_rwd_iter)
        print("Expert Reward Mean: {}".format(exp_rwd_mean))

        exp_obs = FloatTensor(exp_obs)
        exp_acts = FloatTensor(np.array(exp_acts))

        rwd_iter_means = []
        for i in range(num_iters):
            rwd_iter = []

            obs = []
            acts = []
            rets = []
            advs = []
            gms = []

            steps = 0
            while steps < num_steps_per_iter:
                ep_obs = []
                ep_acts = []
                ep_rwds = []
                ep_costs = []
                ep_disc_costs = []
                ep_gms = []
                ep_lmbs = []

                t = 0
                done = False

                ob = env.reset()

                while not done and steps < num_steps_per_iter:
                    act = self.act(ob)

                    ep_obs.append(ob)
                    obs.append(ob)

                    ep_acts.append(act)
                    acts.append(act)

                    if render:
                        env.render()
                    ob, rwd, done, info = env.step(act)

                    ep_rwds.append(rwd)
                    ep_gms.append(gae_gamma**t)
                    ep_lmbs.append(gae_lambda**t)

                    t += 1
                    steps += 1

                    if horizon is not None:
                        if t >= horizon:
                            break

                if done:
                    rwd_iter.append(np.sum(ep_rwds))

                ep_obs = FloatTensor(ep_obs)
                # ep_acts = FloatTensor(np.array(ep_acts)).to(torch.device("cuda"))
                ep_acts = FloatTensor(np.array(ep_acts))
                ep_rwds = FloatTensor(ep_rwds)
                # ep_disc_rwds = FloatTensor(ep_disc_rwds)
                ep_gms = FloatTensor(ep_gms)
                ep_lmbs = FloatTensor(ep_lmbs)

                ep_costs = (-1) * torch.log(self.d(ep_obs, ep_acts))\
                    .squeeze().detach()
                ep_disc_costs = ep_gms * ep_costs

                ep_disc_rets = FloatTensor(
                    [sum(ep_disc_costs[i:]) for i in range(t)])
                ep_rets = ep_disc_rets / ep_gms

                rets.append(ep_rets)

                self.v.eval()
                curr_vals = self.v(ep_obs).detach()
                next_vals = torch.cat(
                    (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach()
                ep_deltas = ep_costs.unsqueeze(-1)\
                    + gae_gamma * next_vals\
                    - curr_vals

                ep_advs = torch.FloatTensor([
                    ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) *
                     ep_deltas[j:]).sum() for j in range(t)
                ])
                advs.append(ep_advs)

                gms.append(ep_gms)

            rwd_iter_means.append(np.mean(rwd_iter))
            print("Iterations: {},   Reward Mean: {}".format(
                i + 1, np.mean(rwd_iter)))

            obs = FloatTensor(obs)
            # acts = FloatTensor(np.array(acts)).to(torch.device("cuda"))
            acts = FloatTensor(np.array(acts))
            rets = torch.cat(rets)
            advs = torch.cat(advs)
            gms = torch.cat(gms)

            if normalize_advantage:
                advs = (advs - advs.mean()) / advs.std()

            self.d.train()
            exp_scores = self.d.get_logits(exp_obs, exp_acts)
            nov_scores = self.d.get_logits(obs, acts)

            opt_d.zero_grad()
            loss = torch.nn.functional.binary_cross_entropy_with_logits(
                exp_scores, torch.zeros_like(exp_scores)
            ) \
                + torch.nn.functional.binary_cross_entropy_with_logits(
                    nov_scores, torch.ones_like(nov_scores)
                )
            loss.backward()
            opt_d.step()

            self.v.train()
            old_params = get_flat_params(self.v).detach()
            old_v = self.v(obs).detach()

            def constraint():
                return ((old_v - self.v(obs))**2).mean()

            grad_diff = get_flat_grads(constraint(), self.v)

            def Hv(v):
                hessian = get_flat_grads(torch.dot(grad_diff, v), self.v)\
                    .detach()

                return hessian

            g = get_flat_grads(
                ((-1) * (self.v(obs).squeeze() - rets)**2).mean(),
                self.v).detach()
            s = conjugate_gradient(Hv, g).detach()

            Hs = Hv(s).detach()
            alpha = torch.sqrt(2 * eps / torch.dot(s, Hs))

            new_params = old_params + alpha * s

            set_params(self.v, new_params)

            self.pi.train()
            old_params = get_flat_params(self.pi).detach()
            old_distb = self.pi(obs)

            def L():
                distb = self.pi(obs)

                return (advs.to(torch.device("cuda")) * torch.exp(
                    distb.log_prob(acts) - old_distb.log_prob(acts).detach())
                        ).mean()

            def kld():
                distb = self.pi(obs)

                if self.discrete:
                    old_p = old_distb.probs.detach()
                    p = distb.probs

                    return (old_p * (torch.log(old_p) - torch.log(p)))\
                        .sum(-1)\
                        .mean()

                else:
                    old_mean = old_distb.mean.detach()
                    old_cov = old_distb.covariance_matrix.sum(-1).detach()
                    mean = distb.mean
                    cov = distb.covariance_matrix.sum(-1)

                    return (0.5) * ((old_cov / cov).sum(-1) +
                                    (((old_mean - mean)**2) / cov).sum(-1) -
                                    self.action_dim + torch.log(cov).sum(-1) -
                                    torch.log(old_cov).sum(-1)).mean()

            grad_kld_old_param = get_flat_grads(kld(), self.pi)

            def Hv(v):
                hessian = get_flat_grads(torch.dot(grad_kld_old_param, v),
                                         self.pi).detach()

                return hessian + cg_damping * v

            g = get_flat_grads(L(), self.pi).detach()

            s = conjugate_gradient(Hv, g).detach()
            Hs = Hv(s).detach()

            new_params = rescale_and_linesearch(g, s, Hs, max_kl, L, kld,
                                                old_params, self.pi)

            disc_causal_entropy = ((-1) * gms * self.pi(obs).log_prob(acts))\
                .mean()
            grad_disc_causal_entropy = get_flat_grads(disc_causal_entropy,
                                                      self.pi)
            new_params += lambda_ * grad_disc_causal_entropy

            set_params(self.pi, new_params)

        return exp_rwd_mean, rwd_iter_means
Пример #19
0
    def train(self, env, render=False):
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        horizon = self.train_config["horizon"]
        gamma_ = self.train_config["gamma"]
        lambda_ = self.train_config["lambda"]
        eps = self.train_config["epsilon"]
        max_kl = self.train_config["max_kl"]
        cg_damping = self.train_config["cg_damping"]
        normalize_advantage = self.train_config["normalize_advantage"]

        rwd_iter_means = []
        for i in range(num_iters):
            rwd_iter = []

            obs = []
            acts = []
            rets = []
            advs = []
            gms = []

            steps = 0
            while steps < num_steps_per_iter:
                ep_obs = []
                ep_rwds = []
                ep_disc_rwds = []
                ep_gms = []
                ep_lmbs = []

                t = 0
                done = False

                ob = env.reset()

                while not done and steps < num_steps_per_iter:
                    act = self.act(ob)

                    ep_obs.append(ob)
                    obs.append(ob)
                    acts.append(act)

                    if render:
                        env.render()
                    ob, rwd, done, info = env.step(act)

                    ep_rwds.append(rwd)
                    ep_disc_rwds.append(rwd * (gamma_**t))
                    ep_gms.append(gamma_**t)
                    ep_lmbs.append(lambda_**t)

                    t += 1
                    steps += 1

                    if horizon is not None:
                        if t >= horizon:
                            done = True
                            break

                if done:
                    rwd_iter.append(np.sum(ep_rwds))

                ep_obs = FloatTensor(np.array(ep_obs))
                ep_rwds = FloatTensor(ep_rwds)
                ep_disc_rwds = FloatTensor(ep_disc_rwds)
                ep_gms = FloatTensor(ep_gms)
                ep_lmbs = FloatTensor(ep_lmbs)

                ep_disc_rets = FloatTensor(
                    [sum(ep_disc_rwds[i:]) for i in range(t)])
                ep_rets = ep_disc_rets / ep_gms

                rets.append(ep_rets)

                self.v.eval()
                curr_vals = self.v(ep_obs).detach()
                next_vals = torch.cat(
                    (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach()
                ep_deltas = ep_rwds.unsqueeze(-1)\
                    + gamma_ * next_vals\
                    - curr_vals

                ep_advs = FloatTensor([
                    ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) *
                     ep_deltas[j:]).sum() for j in range(t)
                ])
                advs.append(ep_advs)

                gms.append(ep_gms)

            rwd_iter_means.append(np.mean(rwd_iter))
            print("Iterations: {},   Reward Mean: {}".format(
                i + 1, np.mean(rwd_iter)))

            obs = FloatTensor(np.array(obs))
            acts = FloatTensor(np.array(acts))
            rets = torch.cat(rets)
            advs = torch.cat(advs)
            gms = torch.cat(gms)

            if normalize_advantage:
                advs = (advs - advs.mean()) / advs.std()

            self.v.train()
            old_params = get_flat_params(self.v).detach()
            old_v = self.v(obs).detach()

            def constraint():
                return ((old_v - self.v(obs))**2).mean()

            grad_diff = get_flat_grads(constraint(), self.v)

            def Hv(v):
                hessian = get_flat_grads(torch.dot(grad_diff, v), self.v)\
                    .detach()

                return hessian

            g = get_flat_grads(
                ((-1) * (self.v(obs).squeeze() - rets)**2).mean(),
                self.v).detach()
            s = conjugate_gradient(Hv, g).detach()

            Hs = Hv(s).detach()
            alpha = torch.sqrt(2 * eps / torch.dot(s, Hs))

            new_params = old_params + alpha * s

            set_params(self.v, new_params)

            self.pi.train()
            old_params = get_flat_params(self.pi).detach()
            old_distb = self.pi(obs)

            def L():
                distb = self.pi(obs)

                return (advs * torch.exp(
                    distb.log_prob(acts) - old_distb.log_prob(acts).detach())
                        ).mean()

            def kld():
                distb = self.pi(obs)

                if self.discrete:
                    old_p = old_distb.probs.detach()
                    p = distb.probs

                    return (old_p * (torch.log(old_p) - torch.log(p)))\
                        .sum(-1)\
                        .mean()

                else:
                    old_mean = old_distb.mean.detach()
                    old_cov = old_distb.covariance_matrix.sum(-1).detach()
                    mean = distb.mean
                    cov = distb.covariance_matrix.sum(-1)

                    return (0.5) * ((old_cov / cov).sum(-1) +
                                    (((old_mean - mean)**2) / cov).sum(-1) -
                                    self.action_dim + torch.log(cov).sum(-1) -
                                    torch.log(old_cov).sum(-1)).mean()

            grad_kld_old_param = get_flat_grads(kld(), self.pi)

            def Hv(v):
                hessian = get_flat_grads(torch.dot(grad_kld_old_param, v),
                                         self.pi).detach()

                return hessian + cg_damping * v

            g = get_flat_grads(L(), self.pi).detach()

            s = conjugate_gradient(Hv, g).detach()
            Hs = Hv(s).detach()

            new_params = rescale_and_linesearch(g, s, Hs, max_kl, L, kld,
                                                old_params, self.pi)

            set_params(self.pi, new_params)

        return rwd_iter_means
Пример #20
0
    def train(self, env, render=False):
        lr = self.train_config["lr"]
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        num_epochs = self.train_config["num_epochs"]
        minibatch_size = self.train_config["minibatch_size"]
        horizon = self.train_config["horizon"]
        gamma_ = self.train_config["gamma"]
        lambda_ = self.train_config["lambda"]
        eps = self.train_config["epsilon"]
        c1 = self.train_config["vf_coeff"]
        c2 = self.train_config["entropy_coeff"]
        normalize_advantage = self.train_config["normalize_advantage"]

        opt_pi = torch.optim.Adam(self.pi.parameters(), lr)
        opt_v = torch.optim.Adam(self.v.parameters(), lr)

        rwd_iter_means = []
        for i in range(num_iters):
            rwd_iter = []

            obs = []
            acts = []
            rets = []
            advs = []
            gms = []

            steps = 0
            while steps < num_steps_per_iter:
                ep_obs = []
                ep_rwds = []
                ep_disc_rwds = []
                ep_gms = []
                ep_lmbs = []

                t = 0
                done = False

                ob = env.reset()

                while not done and steps < num_steps_per_iter:
                    act = self.act(ob)

                    ep_obs.append(ob)
                    obs.append(ob)
                    acts.append(act)

                    if render:
                        env.render()
                    ob, rwd, done, info = env.step(act)

                    ep_rwds.append(rwd)
                    ep_disc_rwds.append(rwd * (gamma_**t))
                    ep_gms.append(gamma_**t)
                    ep_lmbs.append(lambda_**t)

                    t += 1
                    steps += 1

                    if horizon is not None:
                        if t >= horizon:
                            done = True
                            break

                if done:
                    rwd_iter.append(np.sum(ep_rwds))

                ep_obs = FloatTensor(np.array(ep_obs))
                ep_rwds = FloatTensor(ep_rwds)
                ep_disc_rwds = FloatTensor(ep_disc_rwds)
                ep_gms = FloatTensor(ep_gms)
                ep_lmbs = FloatTensor(ep_lmbs)

                ep_disc_rets = FloatTensor(
                    [sum(ep_disc_rwds[i:]) for i in range(t)])
                ep_rets = ep_disc_rets / ep_gms

                rets.append(ep_rets)

                self.v.eval()
                curr_vals = self.v(ep_obs).detach()
                next_vals = torch.cat(
                    (self.v(ep_obs)[1:], FloatTensor([[0.]]))).detach()
                ep_deltas = ep_rwds.unsqueeze(-1)\
                    + gamma_ * next_vals\
                    - curr_vals

                ep_advs = FloatTensor([
                    ((ep_gms * ep_lmbs)[:t - j].unsqueeze(-1) *
                     ep_deltas[j:]).sum() for j in range(t)
                ])
                advs.append(ep_advs)

                gms.append(ep_gms)

            rwd_iter_means.append(np.mean(rwd_iter))
            print("Iterations: {},   Reward Mean: {}".format(
                i + 1, np.mean(rwd_iter)))

            obs = FloatTensor(np.array(obs))
            acts = FloatTensor(np.array(acts))
            rets = torch.cat(rets)
            advs = torch.cat(advs)
            gms = torch.cat(gms)

            if normalize_advantage:
                advs = (advs - advs.mean()) / advs.std()

            self.pi.eval()
            old_log_pi = self.pi(obs).log_prob(acts).detach()

            self.pi.train()
            self.v.train()

            max_steps = num_epochs * (num_steps_per_iter // minibatch_size)

            for _ in range(max_steps):
                minibatch_indices = np.random.choice(range(steps),
                                                     minibatch_size, False)
                mb_obs = obs[minibatch_indices]
                mb_acts = acts[minibatch_indices]
                mb_advs = advs[minibatch_indices]
                mb_rets = rets[minibatch_indices]

                mb_distb = self.pi(mb_obs)
                mb_log_pi = mb_distb.log_prob(mb_acts)
                mb_old_log_pi = old_log_pi[minibatch_indices]

                r = torch.exp(mb_log_pi - mb_old_log_pi)

                L_clip = torch.minimum(
                    r * mb_advs,
                    torch.clip(r, 1 - eps, 1 + eps) * mb_advs)

                L_vf = (self.v(mb_obs).squeeze() - mb_rets)**2

                S = mb_distb.entropy()

                opt_pi.zero_grad()
                opt_v.zero_grad()
                loss = (-1) * (L_clip - c1 * L_vf + c2 * S).mean()
                loss.backward()
                opt_pi.step()
                opt_v.step()

        return rwd_iter_means
Пример #21
0
def main():
    print('Beginning to train!\n')
    epoch_loss_lst = []
    valid_loss_lst = []

    num_epochs = args.epochs
    batch_size = state_dict['BATCH_SIZE']

    training_acc_lst = []
    validation_acc_lst = []

    attn_lst = []
    tst_preds = []
    tst_true = []

    for e in range(num_epochs):
        batch_loss = []
        vloss_lst = []
        trn_preds = []
        val_preds = []
        trn_true = []
        val_true = []
        attns = []
        update_dict = {}

        for batch in tqdm(range(len(train_answer_list) // batch_size)):
            batch_range = list(
                range(batch * batch_size, (batch + 1) * batch_size))
            optimizer.zero_grad()
            train_seq = train_layout_list[batch * batch_size]
            label = Variable(
                FloatTensor(
                    np.matrix([train_answer_list[br] for br in batch_range])))
            xtxt, attn, txtloss = attn_seq2seq.forward(train_qbatches[batch],
                                                       train_lbatches[batch],
                                                       train_obatches[batch])
            attns.append(attn)
            if isinstance(training_task, VQAModuloTask):
                img = Variable(FloatTensor(train_images[batch_range, :, :, :]))
                xvis = training_task.module_dict['_Img'].forward(img)
                network = training_task.assemble(train_seq, xvis, xtxt)
            else:
                network = training_task.assemble(train_seq, None, xtxt)

            output = loss(network.squeeze(2), label.permute(1, 0)) + txtloss
            output.backward()

            if args.use_gradient_clipping:
                clip_grad_norm(param_list, max_norm=args.max_grad_norm)

            optimizer.step()

            if state_dict['GPU_SUPPORT']:
                label = label.cpu()
                output = output.cpu()
                network = network.cpu()

            trn_true.extend(list(label.permute(1, 0).data.numpy().flatten()))
            trn_preds.extend(
                list(sigmoid(network.squeeze(2)).data.numpy().flatten()))
            batch_loss.append(output.data.numpy()[0])

        epoch_loss_lst.append(np.mean(batch_loss))
        print('EPOCH {}/{} \n\tTRAINING LOSS = {}'.format(
            e + 1, num_epochs, epoch_loss_lst[-1]))
        update_dict['training_loss'] = epoch_loss_lst[-1]

        for vbatch in range(len(valid_answer_list) // batch_size):
            vbatch_range = list(
                range(vbatch * batch_size, (vbatch + 1) * batch_size))
            valid_seq = valid_layout_list[vbatch * batch_size]
            valid_label = Variable(
                FloatTensor(
                    np.matrix([valid_answer_list[br] for br in vbatch_range])))
            vxtxt, _, vtxtloss = attn_seq2seq.forward(valid_qbatches[vbatch],
                                                      valid_lbatches[vbatch],
                                                      valid_obatches[vbatch])
            if isinstance(training_task, VQAModuloTask):
                vimg = Variable(
                    FloatTensor(valid_images[vbatch_range, :, :, :]))
                vx_vis = training_task.module_dict['_Img'].forward(vimg)
                network = training_task.assemble(valid_seq, vx_vis, vxtxt)
            else:
                network = training_task.assemble(valid_seq, None, vxtxt)

            output = loss(network.squeeze(2), valid_label.permute(
                1, 0)) + vtxtloss

            if state_dict['GPU_SUPPORT']:
                valid_label = valid_label.cpu()
                output = output.cpu()
                network = network.cpu()
            val_true.extend(
                list(valid_label.permute(1, 0).data.numpy().flatten()))
            val_preds.extend(
                list(sigmoid(network.squeeze(2)).data.numpy().flatten()))
            vloss_lst.append(output.data.numpy()[0])
        valid_loss_lst.append(np.mean(vloss_lst))

        print('\tVALIDATION LOSS = {}'.format(valid_loss_lst[-1]))
        update_dict['validation_loss'] = valid_loss_lst[-1]

        trn_preds = sigmoid_map(trn_preds)
        val_preds = sigmoid_map(val_preds)
        training_acc = np.mean(np.array(trn_preds) == np.array(trn_true)) * 100
        validation_acc = np.mean(
            np.array(val_preds) == np.array(val_true)) * 100
        print('\tTRAINING ACCURACY: {}\n\tVALIDATION ACCURACY: {}'.format(
            training_acc, validation_acc))

        training_acc_lst.append(training_acc)
        validation_acc_lst.append(validation_acc)
        attn_lst.append(attns)
        update_dict['training_accuracy'] = training_acc
        update_dict['validation_accuracy'] = validation_acc

        if state_dict['GPU_SUPPORT']:
            first_attn = torch.stack(attn_lst[-1][0]).permute(
                1, 0, 2)[0].cpu().data.numpy()
        else:
            first_attn = torch.stack(attn_lst[-1][0]).permute(
                1, 0, 2)[0].data.numpy()

        if args.visdom:
            update_visdom(e, update_dict, first_attn)

        if e % args.checkpoint_freq == 0:
            checkpoint_dict = {
                'seq2seq': attn_seq2seq.state_dict(),
                'optimizer': optimizer.state_dict(),
            }

            for mod_name, mod in training_task.module_dict.items():
                checkpoint_dict[mod_name] = mod.state_dict()

            save_checkpoint(checkpoint_dict)
            print('\nSAVED CHECKPOINT\n')
    print('DONE TRAINING, EVALUATING TEST PERFORMANCE...')
    tloss_lst = []

    for tbatch in range(len(test_answer_list) // batch_size):
        tbatch_range = list(
            range(tbatch * batch_size, (tbatch + 1) * batch_size))
        test_seq = test_layout_list[tbatch * batch_size]
        test_label = Variable(
            FloatTensor(
                np.matrix([test_answer_list[br] for br in tbatch_range])))
        txtxt, _, _ = attn_seq2seq.forward(test_qbatches[tbatch],
                                           test_lbatches[tbatch],
                                           test_obatches[tbatch])
        if isinstance(training_task, VQAModuloTask):
            timg = torch.autograd.Variable(
                FloatTensor(valid_images[tbatch_range, :, :, :]))
            tx_vis = training_task.module_dict['_Img'].forward(timg)
            network = training_task.assemble(test_seq, tx_vis, txtxt)
        else:
            network = training_task.assemble(test_seq, None, txtxt)

        output = loss(network.squeeze(2), test_label.permute(1, 0))

        if state_dict['GPU_SUPPORT']:
            test_label = test_label.cpu()
            output = output.cpu()
            network = network.cpu()
        tst_true.extend(list(test_label.permute(1, 0).data.numpy().flatten()))
        tst_preds.extend(
            list(sigmoid(network.squeeze(2)).data.numpy().flatten()))
        tloss_lst.append(output.data.numpy()[0])
    tst_preds = sigmoid_map(tst_preds)
    tst_acc = np.mean(np.array(tst_preds) == np.array(tst_true)) * 100

    print('TESTING LOSS: {}\nTESTING ACCURACY: {}'.format(
        np.mean(tloss_lst), tst_acc))
Пример #22
0
    def forward(self, x, targets=None, img_dim=None):
        self.img_dim = img_dim
        num_samples = x.size(0)
        grid_size = x.size(2)

        prediction = (x.view(
            num_samples,
            self.num_anchors,
            self.num_classes + 5,
            grid_size,
            grid_size,
        ).permute(0, 1, 3, 4, 2).contiguous())

        # Get outputs
        x = torch.sigmoid(prediction[..., 0])  # Center x
        y = torch.sigmoid(prediction[..., 1])  # Center y
        w = prediction[..., 2]  # Width
        h = prediction[..., 3]  # Height
        pred_conf = torch.sigmoid(prediction[..., 4])  # Conf
        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.

        # If grid size does not match current we compute new offsets
        if grid_size != self.grid_size:
            self.compute_grid_offsets(grid_size, x.is_cuda)

        # Add offset and scale with anchors
        pred_boxes = FloatTensor(prediction[..., :4].shape).cuda(self.device)
        pred_boxes[..., 0] = x.data + self.grid_x
        pred_boxes[..., 1] = y.data + self.grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h

        output = torch.cat(
            (
                pred_boxes.view(num_samples, -1, 4) * self.stride,
                pred_conf.view(num_samples, -1, 1),
                pred_cls.view(num_samples, -1, self.num_classes),
            ),
            -1,
        )

        if targets is None:
            return output, 0
        else:
            (
                iou_scores,
                class_mask,
                obj_mask,
                noobj_mask,
                tx,
                ty,
                tw,
                th,
                tcls,
                tconf,
            ) = utils.build_targets(
                pred_boxes=pred_boxes,
                pred_cls=pred_cls,
                target=targets,
                anchors=self.scaled_anchors,
                ignore_thres=self.ignore_thres,
                device=self.device,
            )

            # Loss : Mask outputs to ignore non-existing objects (except with conf. loss)
            loss_x = self.mse_loss(x[obj_mask], tx[obj_mask])
            loss_y = self.mse_loss(y[obj_mask], ty[obj_mask])
            loss_w = self.mse_loss(w[obj_mask], tw[obj_mask])
            loss_h = self.mse_loss(h[obj_mask], th[obj_mask])
            loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask])
            loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask],
                                            tconf[noobj_mask])
            loss_conf = (self.obj_scale * loss_conf_obj +
                         self.noobj_scale * loss_conf_noobj)
            loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask])
            total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            # Metrics
            cls_acc = 100 * class_mask[obj_mask].mean()
            conf_obj = pred_conf[obj_mask].mean()
            conf_noobj = pred_conf[noobj_mask].mean()
            conf50 = (pred_conf > 0.5).float()
            iou50 = (iou_scores > 0.5).float()
            iou75 = (iou_scores > 0.75).float()
            detected_mask = conf50 * class_mask * tconf
            precision = torch.sum(
                iou50 * detected_mask) / (conf50.sum() + 1e-16)
            recall50 = torch.sum(
                iou50 * detected_mask) / (obj_mask.sum() + 1e-16)
            recall75 = torch.sum(
                iou75 * detected_mask) / (obj_mask.sum() + 1e-16)

            self.metrics = {
                "loss": utils.to_cpu(total_loss).item(),
                "x": utils.to_cpu(loss_x).item(),
                "y": utils.to_cpu(loss_y).item(),
                "w": utils.to_cpu(loss_w).item(),
                "h": utils.to_cpu(loss_h).item(),
                "conf": utils.to_cpu(loss_conf).item(),
                "cls": utils.to_cpu(loss_cls).item(),
                "cls_acc": utils.to_cpu(cls_acc).item(),
                "recall50": utils.to_cpu(recall50).item(),
                "recall75": utils.to_cpu(recall75).item(),
                "precision": utils.to_cpu(precision).item(),
                "conf_obj": utils.to_cpu(conf_obj).item(),
                "conf_noobj": utils.to_cpu(conf_noobj).item(),
                "grid_size": grid_size,
            }

            return output, total_loss
Пример #23
0
    def train(self, env, render=False):
        lr = self.train_config["lr"]
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        horizon = self.train_config["horizon"]
        discount = self.train_config["discount"]
        normalize_advantage = self.train_config["normalize_advantage"]

        opt_pi = torch.optim.Adam(self.pi.parameters(), lr)
        opt_v = torch.optim.Adam(self.v.parameters(), lr)

        rwd_iter_means = []
        rwd_iter = []

        i = 0
        steps = 0
        while i < num_iters:
            obs = []
            acts = []
            rwds = []
            disc_rwds = []
            disc = []

            t = 0
            done = False

            ob = env.reset()

            while not done:
                act = self.act(ob)

                obs.append(ob)
                acts.append(act)

                if render:
                    env.render()
                ob, rwd, done, info = env.step(act)

                rwds.append(rwd)
                disc_rwds.append(rwd * (discount**t))
                disc.append(discount**t)

                t += 1
                steps += 1
                if steps == num_steps_per_iter:
                    rwd_iter_means.append(np.mean(rwd_iter))
                    print("Iterations: {},   Reward Mean: {}".format(
                        i + 1, np.mean(rwd_iter)))

                    i += 1
                    steps = 0
                    rwd_iter = []

                if horizon is not None:
                    if t >= horizon:
                        done = True
                        break

            rwd_iter.append(np.sum(rwds))

            obs = FloatTensor(np.array(obs))
            acts = FloatTensor(np.array(acts))
            rwds = FloatTensor(rwds)

            disc = FloatTensor(disc)

            ###
            disc_rets = FloatTensor(
                [sum(disc_rwds[i:]) for i in range(len(disc_rwds))])
            rets = disc_rets / disc
            ###

            self.v.eval()
            curr_vals = self.v(obs)
            next_vals = torch.cat((self.v(obs)[1:], FloatTensor([[0.]])))
            advantage = (rwds.unsqueeze(-1) + discount * next_vals -
                         curr_vals).detach()
            if normalize_advantage:
                advantage = (advantage - advantage.mean()) / advantage.std()
            # print(advantage.shape, obs.shape, disc.shape)
            delta = (rets - self.v(obs).squeeze()).detach()

            self.v.train()

            opt_v.zero_grad()
            # loss = (0.5) * (
            #     rwds.unsqueeze(-1)
            #     + discount * next_vals.detach()
            #     - self.v(obs)
            # ) ** 2
            loss = (-1) * disc * delta * self.v(obs).squeeze()
            # loss = (0.5) * ((rets - self.v(obs).squeeze()) ** 2)
            # loss = (-1) * disc.unsqueeze(-1) * advantage * self.v(obs)
            # print(loss.shape)
            loss.mean().backward()
            opt_v.step()

            self.pi.train()
            distb = self.pi(obs)

            opt_pi.zero_grad()
            loss = (-1) * disc.unsqueeze(-1) * advantage * distb.log_prob(acts)
            loss.mean().backward()
            opt_pi.step()

        return rwd_iter_means
Пример #24
0
    def train(self, env, render=False):
        lr = self.train_config["lr"]
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        horizon = self.train_config["horizon"]
        discount = self.train_config["discount"]
        max_kl = self.train_config["max_kl"]
        cg_damping = self.train_config["cg_damping"]
        normalize_return = self.train_config["normalize_return"]
        use_baseline = self.train_config["use_baseline"]

        if use_baseline:
            opt_v = torch.optim.Adam(self.v.parameters(), lr)

        rwd_iter_means = []
        for i in range(num_iters):
            rwd_iter = []

            obs = []
            acts = []
            rets = []
            disc = []

            steps = 0
            while steps < num_steps_per_iter:
                ep_rwds = []
                ep_disc_rwds = []
                ep_disc = []

                t = 0
                done = False

                ob = env.reset()

                while not done and steps < num_steps_per_iter:
                    act = self.act(ob)

                    obs.append(ob)
                    acts.append(act)

                    if render:
                        env.render()
                    ob, rwd, done, info = env.step(act)

                    ep_rwds.append(rwd)
                    ep_disc_rwds.append(rwd * (discount ** t))
                    ep_disc.append(discount ** t)

                    t += 1
                    steps += 1

                    if horizon is not None:
                        if t >= horizon:
                            done = True
                            break

                ep_disc = FloatTensor(ep_disc)

                ep_disc_rets = FloatTensor(
                    [sum(ep_disc_rwds[i:]) for i in range(t)]
                )
                ep_rets = ep_disc_rets / ep_disc

                rets.append(ep_rets)
                disc.append(ep_disc)

                if done:
                    rwd_iter.append(np.sum(ep_rwds))

            rwd_iter_means.append(np.mean(rwd_iter))
            print(
                "Iterations: {},   Reward Mean: {}"
                .format(i + 1, np.mean(rwd_iter))
            )

            obs = FloatTensor(np.array(obs))
            acts = FloatTensor(np.array(acts))
            rets = torch.cat(rets)
            disc = torch.cat(disc)

            if normalize_return:
                rets = (rets - rets.mean()) / rets.std()

            if use_baseline:
                self.v.eval()
                delta = (rets - self.v(obs).squeeze()).detach()

                self.v.train()

                opt_v.zero_grad()
                loss = (-1) * disc * delta * self.v(obs).squeeze()
                loss.mean().backward()
                opt_v.step()

            self.pi.train()
            old_params = get_flat_params(self.pi).detach()
            old_distb = self.pi(obs)

            def L():
                distb = self.pi(obs)

                if use_baseline:
                    return (disc * delta * torch.exp(
                                distb.log_prob(acts)
                                - old_distb.log_prob(acts).detach()
                            )).mean()
                else:
                    return (disc * rets * torch.exp(
                                distb.log_prob(acts)
                                - old_distb.log_prob(acts).detach()
                            )).mean()

            def kld():
                distb = self.pi(obs)

                if self.discrete:
                    old_p = old_distb.probs.detach()
                    p = distb.probs

                    return (old_p * (torch.log(old_p) - torch.log(p)))\
                        .sum(-1)\
                        .mean()

                else:
                    old_mean = old_distb.mean.detach()
                    old_cov = old_distb.covariance_matrix.sum(-1).detach()
                    mean = distb.mean
                    cov = distb.covariance_matrix.sum(-1)

                    return (0.5) * (
                            (old_cov / cov).sum(-1)
                            + (((old_mean - mean) ** 2) / cov).sum(-1)
                            - self.action_dim
                            + torch.log(cov).sum(-1)
                            - torch.log(old_cov).sum(-1)
                        ).mean()

            grad_kld_old_param = get_flat_grads(kld(), self.pi)

            def Hv(v):
                hessian = get_flat_grads(
                    torch.dot(grad_kld_old_param, v),
                    self.pi
                ).detach()

                return hessian + cg_damping * v

            g = get_flat_grads(L(), self.pi).detach()

            s = conjugate_gradient(Hv, g).detach()
            Hs = Hv(s).detach()

            new_params = rescale_and_linesearch(
                g, s, Hs, max_kl, L, kld, old_params, self.pi
            )

            set_params(self.pi, new_params)

        return rwd_iter_means
Пример #25
0
 def __init__(self):
     super(VGG, self).__init__()
     vgg = vgg19(pretrained=True)
     self.vgg_mean = FloatTensor([[[[0.485]], [[0.456]], [[0.406]]]])
     self.vgg_std = FloatTensor([[[[0.229]], [[0.224]], [[0.225]]]])
     self.vgg_relu4_4 = vgg.features[:27]
Пример #26
0
    def train(self, env, render=False):
        lr = self.train_config["lr"]
        num_iters = self.train_config["num_iters"]
        num_steps_per_iter = self.train_config["num_steps_per_iter"]
        horizon = self.train_config["horizon"]
        discount = self.train_config["discount"]
        normalize_return = self.train_config["normalize_return"]
        use_baseline = self.train_config["use_baseline"]

        opt_pi = torch.optim.Adam(self.pi.parameters(), lr)
        if use_baseline:
            opt_v = torch.optim.Adam(self.v.parameters(), lr)

        rwd_iter_means = []
        rwd_iter = []

        i = 0
        steps = 0
        while i < num_iters:
            obs = []
            acts = []
            rwds = []
            disc_rwds = []
            disc = []

            t = 0
            done = False

            ob = env.reset()

            while not done:
                act = self.act(ob)

                obs.append(ob)
                acts.append(act)

                if render:
                    env.render()
                ob, rwd, done, info = env.step(act)

                rwds.append(rwd)
                disc_rwds.append(rwd * (discount**t))
                disc.append(discount**t)

                t += 1
                steps += 1
                if steps == num_steps_per_iter:
                    rwd_iter_means.append(np.mean(rwd_iter))
                    print("Iterations: {},   Reward Mean: {}".format(
                        i + 1, np.mean(rwd_iter)))

                    i += 1
                    steps = 0
                    rwd_iter = []

                if horizon is not None:
                    if t >= horizon:
                        done = True
                        break

            rwd_iter.append(np.sum(rwds))

            obs = FloatTensor(np.array(obs))
            acts = FloatTensor(np.array(acts))

            disc = FloatTensor(disc)

            disc_rets = FloatTensor(
                [sum(disc_rwds[i:]) for i in range(len(disc_rwds))])
            rets = disc_rets / disc

            if normalize_return:
                rets = (rets - rets.mean()) / rets.std()

            if use_baseline:
                self.v.eval()
                delta = (rets - self.v(obs).squeeze()).detach()

                self.v.train()

                opt_v.zero_grad()
                loss = (-1) * disc * delta * self.v(obs).squeeze()
                loss.mean().backward()
                opt_v.step()

            self.pi.train()
            distb = self.pi(obs)

            opt_pi.zero_grad()
            if use_baseline:
                loss = (-1) * disc * delta * distb.log_prob(acts)
            else:
                loss = (-1) * disc * distb.log_prob(acts) * rets
            loss.mean().backward()
            opt_pi.step()

        return rwd_iter_means
Пример #27
0
 def forward(self, input):
     vgg_mean = FloatTensor([[[[0.485]], [[0.456]], [[0.406]]]])
     vgg_std = FloatTensor([[[[0.229]], [[0.224]], [[0.225]]]])
     return self.vgg_relu4_4((input - vgg_mean) / vgg_std)
Пример #28
0
    def train(self, input: ByteTensor, target_class: int):
        """Train the machine with a single example.

        :param input: Input vector. Shape (feature_count, )
        :param target_class: Correct class for input.
        """
        clause_outputs = self.evaluate_clauses(input)
        class_sum = self.sum_up_class_votes(clause_outputs)

        #####################################
        ### Calculate Feedback to Clauses ###
        #####################################

        pos_feedback = ByteTensor(*self.clause_shape).zero_()
        neg_feedback = ByteTensor(*self.clause_shape).zero_()

        # Process negative targets
        threshold = (1.0 / (self.threshold * 2)) * \
                    (self.threshold + class_sum.float())
        threshold = threshold.view(1, self.class_count, 1, 1)
        threshold = threshold.expand(*self.clause_shape)
        feedback_rand = FloatTensor(2, self.class_count,
                                    self.clauses_per_class // 2, 1).uniform_()
        feedback_threshold = feedback_rand <= threshold
        neg_feedback[0] = feedback_threshold[0]
        pos_feedback[1] = feedback_threshold[1]

        # Process target
        feedback_rand = FloatTensor(2, self.clauses_per_class // 2,
                                    1).uniform_()
        feedback_threshold = (
            feedback_rand <= (1.0 / (self.threshold * 2)) *
            (self.threshold - class_sum[target_class].float()))

        pos_feedback[0, target_class] = feedback_threshold[0]
        neg_feedback[1, target_class] = feedback_threshold[1]
        neg_feedback[0, target_class] = 0
        pos_feedback[1, target_class] = 0

        #################################
        ### Train Individual Automata ###
        #################################

        low_prob = FloatTensor(*self.action.shape).uniform_() <= 1 / self.s
        high_prob = FloatTensor(
            *self.action.shape).uniform_() <= (self.s - 1) / self.s

        pos_feedback = pos_feedback.expand_as(low_prob)
        neg_feedback = neg_feedback.expand_as(low_prob)
        clauses = clause_outputs.expand_as(low_prob)
        not_clauses = clauses ^ 1
        X = input.expand_as(low_prob)

        #---------------------- Start CUDA
        if use_cuda:
            increment, decrement, inv_increment, inv_decrement = \
                learn(clauses, X, low_prob, high_prob, pos_feedback,
                      neg_feedback, self.action, self.inv_action)
        else:

            inv_X = (input ^ 1).expand_as(low_prob)
            notclause_low = not_clauses & low_prob & pos_feedback
            clause_x_high = clauses & X & high_prob & pos_feedback
            clause_notx_low = clauses & inv_X & low_prob & pos_feedback
            clause_notx_high = clauses & inv_X & high_prob & pos_feedback
            clause_x_low = clauses & X & low_prob & pos_feedback

            clause_notx_notaction = clauses & inv_X & (self.action
                                                       ^ 1) & neg_feedback
            clause_x_noninvaction = clauses & X & (self.inv_action
                                                   ^ 1) & neg_feedback

            # The learning algorithm will increment, decrement, or leave untouched
            # every automata. You can see the exclusiveness in the following logic.

            increment = clause_x_high | clause_notx_notaction
            decrement = notclause_low | clause_notx_low

            inv_increment = clause_x_noninvaction | clause_notx_high
            inv_decrement = clause_x_low | notclause_low

        #----------------------- End CUDA
        delta = increment.int() - decrement.int()
        inv_delta = inv_increment.int() - inv_decrement.int()
        self.automata += delta
        self.inv_automata += inv_delta

        # Keep automata in bounds [0, 2 * states]
        self.automata.clamp(1, 2 * self.states)
        self.inv_automata.clamp(1, 2 * self.states)

        self.update_action()
Пример #29
0
"""
===========================================
Compatible with torch and tensorflow
===========================================

"""

# Author: Chaojie Wang <*****@*****.**>; Jiawen Wu <*****@*****.**>
# Jiawen Wu <*****@*****.**>; Chaojie Wang <*****@*****.**>

import warnings

try:
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    tf.Variable(1)

except:
    try:
        from torch.cuda import FloatTensor
        x = FloatTensor(1)
    except:
        warnings.warn(
            "not find torch or tensorflow packages,DSG may be error after running a torch or tensorflow code"
        )