示例#1
0
def graclus_out(pos, x, cluster):
    uniques = cluster.unique()
    new_nr = uniques.size(0)

    new_pos = torch.zero(new_nr, 3)
    new_x = torch.zero(new_nr, x.size(1), x.size(2))
    for i in range(new_nr):
        cluster_id = cluster[i]
        new_pos[cluster_id] = pos[i]

    return
def JointEmbeddingLoss(fea_txt, fea_img, labels):
    batchsize = fea_img.size(0)
    num_class = fea_txt.size(1)
    score = torch.zero(batchsize, num_class)

    loss = 0
    #acc_batch = 0
    for i in range(batchsize):

        for j in range(num_class):
            score[i][j] = torch.dot(fea_img[i], fea_txt[:,j])

        label_score = score[i, labels[i]]
        for j in range(num_class):
            if j != labels[i]:
                cur_score = score[i][j]
                thresh = cur_score - label_score + 1
                if thresh > 0:
                    loss += thresh
                    txt_diff = fea_txt[:,j] - fea_txt[:, labels[i]]

        max_score, max_ix = score[i].max()
        if max_ix[1][1] == labels[i]:
            acc_batch += 1

        #acc_batch = 100 * (acc_batch / batchsize)
        denom = batchsize * num_class

        return loss / denom
示例#3
0
def calculate_l2(index, pair_list, output, labels, CUDA=True):
    same_label_loss = 0
    same_label_count = 0
    diff_label_loss = 0
    diff_label_count = 0

    for i in pair_list:
        if i == index:
            continue
        if labels[i] == labels[index]:
            same_label_loss += torch.dist(output[i], output[index], 2)
            same_label_count += 1
        else:
            diff_label_loss += torch.dist(output[i], output[index], 2)
            diff_label_count += 1
    if same_label_count == 0:
        if CUDA:
            return torch.Tensor([0
                                 ]).cuda(0), diff_label_loss / diff_label_count
        else:
            return torch.Tensor([0]), diff_label_loss / diff_label_count
    elif diff_label_count == 0:
        if CUDA:
            return same_label_loss / same_label_count, torch.zero().cuda(0)
        else:
            return same_label_loss / same_label_count, torch.Tensor([0])
    else:
        return same_label_loss / same_label_count, diff_label_loss / diff_label_count
示例#4
0
    def __getitem__(self, index):
        img_basename = self.list_sample[index]

        path_img = os.path.join(self.root_img, img_basename)
        path_seg = os.path.join(self.root_seg,
                                img_basename.replace('.jpg', '.png'))

        assert os.path.exists(path_img), '[{}] does not exist'.format(path_img)
        assert os.path.exists(path_seg), '[{}] does not exist'.format(path_seg)

        # load image and label
        try:
            img = imread(path_img, mode='RGB')
            seg = imread(path_seg)
            assert (img.ndim == 3)
            assert (seg.ndim == 2)
            assert (img.shape[0] == seg.shape[0])
            assert (img.shape[1] == seg.shape[1])

            # random scale, crop, flip
            if self.imgSize > 0:
                img, seg = self._scale_and_corp(img, seg, self.imgSize,
                                                self.is_train)

                if random.choice([-1, 1]) > 0:
                    img, seg = self._flip(img, seg)

            # image to float
            img = img.astype(np.float32) / 255
            img = img.transpose(2, 0, 1)

            # label to int from -1 to 149  totall 151
            seg = seg.astype(np.int) - 1

            # to torch tensor
            image = torch.from_numpy(img)
            segmentation = torch.from_numpy(seg)

        except Exception as e:
            print('Failed loading image/segmentation [{}]: {}'.format(
                path_img, e))

            # dummy data
            image = torch.zero(3, self.imgSize, self.imgSize)
            segmentation = -1 * torch.ones(self.segSize, self.segSize).long()

            return image, segmentation, img_basename

        # substracted by mean and divided by std
        image = self.img_transform(image)

        return image, segmentation, img_basename
示例#5
0
def matrix2angle(matrix):
	"""
    ref: https://github.com/matthew-brett/transforms3d/blob/master/transforms3d/euler.py
    input size: ... * 3 * 3
    output size:  ... * 3
    """
	i = 0
	j = 1
	k = 2
	dims = [dim for dim in matrix.shape]
	M = matrix.contiguous().view(-1, 3, 3)

	cy = torch.sqrt(M[:, i, i] * M[:, i, i] + M[:, j, i] * M[:, j, i])

	if torch.max(cy).item() > 1e-15 * 4:
		ax = torch.atan2(M[:, k, j], M[:, k, k])
		ay = torch.atan2(-M[:, k, i], cy)
		az = torch.atan2(M[:, j, i], M[:, i, i])
	else:
		ax = torch.atan2(-M[:, j, k], M[:, j, j])
		ay = torch.atan2(-M[:, k, i], cy)
		az = torch.zero(matrix.shape[:-1])
	return torch.cat([torch.unsqueeze(ax, -1), torch.unsqueeze(ay, -1), torch.unsqueeze(az, -1)], -1).view(dims[:-1])
示例#6
0
    def forward(self, rnn_inputs):
        # TODO careful here, check
        num_steps = rnn_inputs.shape[1]
        # TODO: beware, rnn_inputs needs to be a vector of
        # shape (seq_len, batch_input_size)
        output, hidden = self.rnn(rnn_inputs, (self.h0, self.c0))
        # add softmax layer
        # TODO check in corresponding tf code following line
        # also possible: self.softmax = nn.Softmax(dim=0);
        # out = self.softmax(output)
        out = F.softmax(output, dim=0)

        if not D_DIFF and G_DIFF:  # depend on D_DIFF
            W = torch.randn((self.state_size, 1))
            b = torch.zeros([1])
            # logits_t = torch.matmul(output, W) + b
            logits_t = torch.mm(output, W) + b
            logits_t = F.elu(logits_t) + 1
            logits_t = torch.cumsum(logits_t, dim=1)
            out = logits_t

        if MARK:
            W = torch.randn((self.state_size, 1))
            b = torch.zeros([1])
            logits_t = torch.mm(output, W) + b
            # redeclare W, b
            W = torch.randn((self.state_size, DIM_SIZE))
            b = torch.zero([DIM_SIZE])
            logits_prob = torch.mm(output, W) + b
            logits_prob = nn.Softmax(logits_prob)
            logits = torch.cat([logits_t, logits_prob], dim=1)
            logits.resize_(self.batch_size, num_steps, DIM_SIZE + 1)
            out = logits
        else:
            out.resize(self.batch_size, num_steps, 1)
        return out, hidden
示例#7
0
    def __init__(self, d_model, max_len=512):
        """
        d_model: 一个标量。模型的维度,论文默认是512
        max_seq_len: 一个标量。文本序列的最大长度
        位置编码与词向量编码维度都是512,是为了能够相加在一起
		Transformer位置编码的每一个维度对应正弦曲线,波长构成了从 2*pi 到 10000*2*pi 的等比数列。
		注意:Bert/Gpt使用的是绝对位置编码,而Transformer使用的相对位置编码

		"""

        super().__init__()

        pe = torch.zero(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueenze(1)
        div_term = (torch.arange(0, d_model, 2).float() *
                    -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
 def extract(self, root_dir: str, vocabs):
     path = "{}/output_images_features".format(root_dir)
     if not os.path.exists(path):
         os.makedirs(path)
     for i in range(len(vocabs)):
         vocab = vocabs[i]
         temp = torch.zero(1, 2048)
         vocab_images_path = "{}/output_vocab_images".format(root_dir)
         vocab_features = "{}/{}".format(path, vocab)
         vocab_images = [
             "{}/{}".format(vocab_images_path, p)
             for p in os.listdir(vocab_images_path)
         ]
         for j in range(len(vocab_images)):
             vocab_image = vocab_images[j]
             image = cv2.imread(vocab_image)
             image = cv2.resize(image, (224, 244))
             image = np.swapaxes(image, 0, 2)
             image = np.swapaxes(image, 1, 2)
             # resnet101 input
             image = Variable(torch.from_numpy(image).cuda().unsqueeze(0))
             temp += model(image).squeeze(0).view(1, 2048)
         temp /= 10
         torch.save(temp, vocab_features)
 def create_new_state(self, batch_size: int):
     return torch.zero(batch_size, self.N)
示例#10
0
    def forward(self, loc, conf, dbox_list):
        """
        順伝播の計算を実行する
        ・input
            ⇒ loc:オフセット情報
            ⇒ conf:検出の信頼度
            ⇒ dbox_list:Dboxのリスト[8732,4]
        ・outtput
            ⇒ torch.Size([batch_num, 21, confのtop_k個分, BBox情報])
        """

        # 各サイズを取得
        num_batch = loc.size(0)  # バッチサイズ
        num_dbox = loc.size(1)  # Dboxの数(8732個)
        num_classes = loc.size(2)  # クラス数

        # confはsoftmaxを用いて正規化する
        conf = self.softmax(conf)

        # 出力の型を作成する[batch_num, 21, confのtop_k個分, BBox情報]
        output = torch.zero(num_batch, num_classes, self.top_k, 5)

        # confの列を並び替える
        conf_pred = conf.transpose(2, 1)

        # ミニバッチごとのループ処理
        for i in range(num_batch):
            # decodeを用いてlocとDbox情報からBboxを求める
            decoded_boxes = decode(loc[i], dbox_list)

            # confのコピーを作成
            conf_score = conf_pred[i].clone()

            # クラスごとのループ処理(背景クラス(idx0)は処理しないため1から)
            for cl in range(1, num_classes):
                # conf_score:[21, 8732] ⇒ 各クラスのscoreをDbox毎に持っている
                cs = conf_score[cl]
                c_mask = cs.gt(self.conf_threshold
                               )  # 信頼度が〇以上のものは1, それ以外は0となる(gt:greater_than)
                scores = cs[c_mask]  # 閾値を超えたBboxの個数となる

                if scores.nelement(
                ) == 0:  # nelement:要素の個数を算出する ⇒ つまりscoreが[]の場合は何もしない
                    continue

                # c_mask([8732])をdecoded_scoreにも適応できるようにリサイズ
                l_mask = c_mask.unsqueeze(1).expand_as(
                    decoded_boxes)  # tensor([8732, 4])

                # decoded_boxes[l_mask]で1次元になってしまうため、viewでtensor([閾値以上のBbox数, 4])とする
                boxes = decoded_boxes[l_mask].view(-1, 4)

                # non_maximum_supressionを実施し、被っているBboxは除去する
                # ids:confの降順にnon_maximum_supressionを通過したBboxのindexリスト
                # count:non_maximum_supressionを通過したBboxの数
                ids, count = non_maximun_supression(boxes, scores,
                                                    self.nms_threshold,
                                                    self.top_k)

                # outputにBbox結果を格納する
                out[i, cl, :count] = torch.cat(
                    (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1)
        return output  # tensor([1, 21, 200, 5]) ⇒ 1枚毎のBbox情報(non_maximum_supression済)
示例#11
0
    def __getitem__(self, index):
        """__getitem__

        :param index:
            
        rstrip() : 删除字符串末尾的指定字符(默认为空格).
        os.path.basename(): 返回path最后的文件名. 如果path以/或\结尾,则返回空值.
        """
        img_path = self.files[self.split][index].rstrip()
        label_path = os.path.join(self.annotations_base, os.path.basename(img_path)[:-4] + '.png')
        #print(img_path)
        #print(label_path)
        
        # assert whether path exists 断言路径是否存在
        assert os.path.exists(img_path), '[{}] does not exist'.format(img_path)
        assert os.path.exists(label_path), '[{}] does not exist'.format(label_path)
        
        # load image and label
        try:
            img = m.imread(img_path, mode='RGB')
            #img = np.array(img, dtype=np.uint8)
            #img = img[:, :, ::-1]  # RGB --> BGR !!!
            #print(img.shape)  #  eg. (512, 512, 3)
            label = m.imread(label_path)
            #label = np.array(label, dtype=np.uint8)
            #print(label.shape)  #  eg. (512, 512)
            
            assert(img.ndim == 3)
            assert(label.ndim == 2)
            assert(img.shape[0] == label.shape[0])
            assert(img.shape[1] == label.shape[1])
            
            # image to float
            # img = img[:, :, ::-1]  # RGB --> BGR !!!
            #img = img.astype(np.float32)[:, :, ::-1]  # 64
            #label = label.astype(np.float32)
            
            # classes = np.unique(label)  
            # scale and crop
            # flip
            if self.split == "training":
                random_flip = np.random.choice([-1, 0, 1, 2])
                # print(random_flip)
                if random_flip == 1:
                    #img, label = self._flip(img, label)
                    img = cv2.flip(img, 1)
                    label = cv2.flip(label, 1) 
                elif random_flip == 0:
                    img = cv2.flip(img, 0)
                    label = cv2.flip(label, 0)
                else:
                    img = img.copy()
                    label = label.copy()
                    
                """
                elif random_flip == -1:
                    img = cv2.flip(img, -1)
                    label = cv2.flip(label, -1)
                """
                
            if self.img_size[0] > 0 and self.img_size[1] > 0:
                img, label = self._scale_and_corp(img, label, self.img_size, self.split)
                
            # order默认是1,双线性. resize后image的范围又变为[0 - 1]
            # img = transform.resize(img, (self.img_size[0], self.img_size[1]), order=1) 
            
            # img = m.imresize(img, (self.img_size[0], self.img_size[1]), interp='bilinear') # uint8 with RGB mode
            # img = cv2.resize(img.copy(), (self.img_size[0], self.img_size[1])) 
            
            # image to float
            # image_ori = img.copy()  # original image
            # img = img.astype(np.float32)  # 64
            
            img = img.astype(np.float32)[:, :, ::-1]    # RGB --> BGR !!!
            # label = label.astype(np.float32)
            
            if self.img_norm:
                # Resize scales images from 0 to 255, thus we need to divide by 255.0
                img = img.astype(np.float32) / 255.0
            img = img.transpose((2, 0, 1))  # NHWC --> NCHW
            
            # classes = np.unique(label)    # np.unique() :对于一维数组或列表,函数去除其中的重复元素,并按元素又小到大返回一个新的无元素重复的数组或列表
            # label = label.astype(np.float64)
            # label = m.imresize(label, (self.img_size[0], self.img_size[1]), interp='nearest', mode='F')
            # print(classes)
            # print('label', np.unique(label))
            
            # label = m.imresize(label, (self.img_size[0], self.img_size[1]), interp='nearest')
            
            # label to int from 0/-1 to 150/149 totall 151
            label = label.astype(np.int) - 1
            # label = label.astype(np.int)
            
            """
            if not np.all(classes == np.unique(label)):
                print("WARN: resizing labels yielded fewer classes")
            """
            if not np.all(np.unique(label) < self.n_classes):
                raise ValueError("Segmentation map contained invalid class values")
            
            if self.augmentations is not None:
                img, label = self.augmentations(img, label)
            
            if self.is_transform:
                img, label = self.transform(img, label)
            
            # to torch tensor
            image = torch.from_numpy(img)
            # segmentation = torch.from_numpy(label)
            segmentation = torch.from_numpy(label).long()
            
            # segmentation = segmentation - 1
            
            # substracted by mean and divided by std
            # image = self.img_transfrom(image)
            
        except Exception as e:
            print('Failed loading image/label [{}]: {}'.format(img_path, e))
            
            # dummy datw
            
            image = torch.zero(3, self.img_size[0], self.img_size[1]) # (C, H, W)
            segmentation = -1 * torch.ones(self.img_size[0], self.img_size[1]).long()
            
            return image, segmentation
        
        # substracted by mean and divided by std
        image = self.img_transfrom(image)
        
        return  image, segmentation, img_path
 def initHidden(self):
     return torch.zero(1, 1, self.hiddden_size, device=device)
示例#13
0
    def __getitem__(self, index):

        img_path = self.files[self.split][index].rstrip()
        label_path = os.path.join(self.annotations_base,
                                  os.path.basename(img_path)[:-4] + '.png')

        # assert whether path exists 断言路径是否存在
        assert os.path.exists(img_path), '[{}] does not exist'.format(img_path)
        assert os.path.exists(label_path), '[{}] does not exist'.format(
            label_path)

        # load image and label
        try:
            img = m.imread(img_path, mode='RGB')
            #img = np.array(img, dtype=np.uint8)
            img = img[:, :, ::-1]  # RGB --> BGR !!!
            #print(img.shape)  #  eg. (512, 512, 3)
            label = m.imread(label_path)
            #label = np.array(label, dtype=np.uint8)
            #print(label.shape)  #  eg. (512, 512)

            assert (img.ndim == 3)
            assert (label.ndim == 2)
            assert (img.shape[0] == label.shape[0])
            assert (img.shape[1] == label.shape[1])
            """
            # random scale , crop, flip
            if self.img_size[0] > 0 and self.img_size[1] > 0:
                img, seg = self._scale_and_corp(img, label, self.img_size, self.split)
            """
            # flip
            if self.split == "training":
                random_flip = np.random.choice([0, 1])
                if random_flip == 1:
                    img, label = self._flip(img, label)
                    #img = cv2.flip(img, 1)
                    #label = cv2.flip(label, 1)

            img = m.imresize(img, (self.img_size[0], self.img_size[1]),
                             interp='bilinear')  # uint8 with RGB mode
            #img = cv2.resize(img.copy(), (self.img_size[0], self.img_size[1]))

            # image to float
            # Resize scales images from 0 to 255, thus we need to divide by 255.0
            img = img.astype(np.float32) / 255.0
            img = img.transpose((2, 0, 1))  # NHWC --> NCHW

            label = m.imresize(label, (self.img_size[0], self.img_size[1]),
                               interp='nearest',
                               mode='F')
            # label to int from 0/-1 to 150/149 totall 151
            label = label.astype(np.int) - 1

            # to torch tensor
            image = torch.from_numpy(img)
            segmentation = torch.from_numpy(label).long()

        except Exception as e:
            print('Failed loading image/label [{}]: {}'.format(img_path, e))

            # dummy datw

            image = torch.zero(3, self.img_size[0],
                               self.img_size[1])  # (C, H, W)
            segmentation = -1 * torch.ones(self.img_size[0],
                                           self.img_size[1]).long()

            return image, segmentation

        # substracted by mean and divided by std
        image = self.img_transfrom(image)

        return image, segmentation, img_path
 def update_recency_map(self, nn_indices):
     mask = Variable(torch.zero(*nn_indices.size()).cuda().fill_(1))
     self.recency_map.scatter_add(0, nn_indices.view(-1), mask)
     self.recency_map.add_(-1).clamp_(0, 100)
     _, self.stale_ind = self.recency_map.min(0)