def get_flow(t, theta, map_size):
    """
    Rotates the map by theta and translates the rotated map by t.

    Assume that the robot rotates by an angle theta and then moves forward by
    translation t. This function returns the flow field field. For every pixel in
    the new image it tells us which pixel in the original image it came from:
    NewI(x, y) = OldI(flow_x(x,y), flow_y(x,y)).

    Assume there is a point p in the original image. Robot rotates by R and moves
    forward by t.  p1 = Rt*p; p2 = p1 - t; (the world moves in opposite direction.
    So, p2 = Rt*p - t, thus p2 came from R*(p2+t), which is what this function
    calculates.

      t:      ... x 2 (translation for B batches of N motions each).
      theta:  ... x 1 (rotation for B batches of N motions each).

      Output: ... x map_size x map_size x 2
    """
    B = t.view(-1, 2).size()[0]
    tx, ty = torch.unbind(torch.view(t, [-1, 1, 1, 1, 2]), dim=4)  # Bx1x1x1
    theta = torch.view(theta, [-1, 1, 1, 1])
    # c = tf.constant((map_size - 1.) / 2., dtype=tf.float32)
    c = Variable(torch.Tensor([(map_size - 1.) / 2.]).double())
    x, y = np.meshgrid(np.arange(map_size[0]), np.arange(map_size[1]))
    x = Variable(x[np.newaxis, :, :, np.newaxis]).view(1, map_size[0],
                                                       map_size[1], 1)
    y = Variable(y[np.newaxis, :, :, np.newaxis]).view(1, map_size[0],
                                                       map_size[1], 1)
    # x = tf.constant(x[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='x',
    #                 shape=[1, map_size, map_size, 1])
    # y = tf.constant(y[np.newaxis, :, :, np.newaxis], dtype=tf.float32, name='y',
    #                 shape=[1, map_size, map_size, 1])

    tx = tx - c.expand(tx.size())
    x = x.expand([B] + x.size()[1:])
    x = x + tx.expand(x.size())
    ty = ty - c.expand(ty.size())
    y = y.expand([B] + y.size()[1:])
    y = y + ty.expand(y.size())  # BxHxWx1
    # x = x - (-tx + c.expand(tx.size())) #1xHxWx1
    # y = y - (-ty + c.expand(ty.size()))

    sin_theta = torch.sin(theta)  #Bx1x1x1
    cos_theta = torch.cos(theta)
    xr = x * cos_theta.expand(x.size()) - y * sin_theta.expand(y.size())
    yr = x * sin_theta.expand(x.size()) + y * cos_theta.expand(
        y.size())  # BxHxWx1
    # xr = cos_theta * x - sin_theta * y
    # yr = sin_theta * x + cos_theta * y

    xr = xr + c.expand(xr.size())
    yr = yr + c.expand(yr.size())

    flow = torch.stack([xr, yr], axis=-1)
    sh = t.size()[:-1] + [map_size[0], map_size[1], 2]
    # sh = tf.unstack(tf.shape(t), axis=0)
    # sh = tf.stack(sh[:-1] + [tf.constant(_, dtype=tf.int32) for _ in [map_size, map_size, 2]])
    flow = torch.view(flow, shape=sh)
    return flow
示例#2
0
    def get_representation(self, encoded):
        if 1 - self.opt.dropout_rate_probs < 1e-6:
            encoded = self.dropout_embedding(encoded)

        representation = []
        for one_type in self.opt.pooling_type.split(','):
            if one_type == 'max':
                probs = torch.max(encoded, dim=1)
            elif one_type == 'average':
                probs = torch.mean(encoded, dim=1)
            elif one_type == 'none':
                probs = torch.view(encoded.size(0), -1).contiguous()
            elif one_type == 'max_col':
                probs = torch.max(torch.transpose(encoded, 1, 2), dim=1)
            elif one_type == 'average_col':
                probs = torch.mean(torch.transpose(encoded, 1, 2), dim=1)
            else:
                print(
                    'Wrong input pooling type -- The default flatten layer is used.'
                )
                probs = torch.view(encoded.size(0), -1).contiguous()
            representation.append(probs)

        if len(representation) > 1:
            representation = torch.cat(representation, dim=-1)
        else:
            representation = representation[0]
        return representation
    def forward(self, x):
        """ input is the image activations following a convolutional layer.
            dimensions: N x C x H x W

            The co-occurrence layer computes a vector of length C ** 2
        """

        x = F.relu(self.conv1(x))

        x = F.pad(x, (2, 2, 2, 2), mode='reflect')
        x = self.gaussian(x)

        N, C, H, W = x.size()

        # list of length H*W of (N, C, H, W) tensors containing each offset
        x_offsets = [
            self.roll(self.roll(x, i, 2), j, 3) for i in range(H)
            for j in range(W)
        ]
        x_offsets = torch.cat(x_offsets, 1).to(DEVICE)  # (N, C*H*W, H, W)
        x_offsets = torch.view(N, C * H * W,
                               H * W).permute(0, 2, 1)  # (N, H*W, C*H*W)

        x_base = x.view(N, C, H * W)  # (N, C, H*W)
        corrs = torch.bmm(x_base, x_offsets)  # (N, C, C*H*W)
        corrs = corrs.view(N, C * C, H * W).permute(0, 2, 1)
        c_ij, best_offset = torch.max(corrs, 1)  # (N, C*C)

        return c_ij
示例#4
0
    def forward(self, text, z):
        """ Given a caption embedding and latent variable z(noise), generate an image

        Arguments
        ---------
        text : torch.FloatTensor
            Output of the skipthought embedding model for the caption
            text.size() = (batch_size, text_embed_dim)

        z : torch.FloatTensor
            Latent variable or noise
            z.size() = (batch_size, z_dim)

        --------
        Returns
        --------
        output : An image of shape (64, 64, 3)

        """
        reduced_text = self.reduced_text_dim(
            text)  # (batch_size, reduced_text_dim)
        concat = torch.cat((reduced_text, z),
                           1)  # (batch_size, reduced_text_dim + z_dim)
        concat = self.concat(concat)  # (batch_size, 64*8*4*4)
        concat = torch.view(-1, 4, 4, 64 * 8)  # (batch_size, 4, 4, 64*8)
        d_net_out = self.d_net(concat)  # (batch_size, 64, 64, 3)
        output = d_net_out / 2. + 0.5  # (batch_size, 64, 64, 3)

        return output
    def forward(self, x):
        if self.resnet == False:
            conv1 = self.lrelu(self.conv1(x))
            conv2 = self.lrelu(self.conv2(conv1))
            conv3 = self.lrelu(self.conv3(conv2))
            conv4 = self.lrelu(self.conv4(conv3))
            conv4 = torch.view(conv4.size(0) * self.num_rotation, -1)
            gan_logits = self.fully_connect_gan1(conv4)
            if self.ssup:
                rot_logits = self.fully_connect_rot1(conv4)
                rot_prob = self.softmax(rot_logits)
        else:
            re1 = self.re1(x)
            re2 = self.re2(re1)
            re3 = self.re3(re2)
            re4 = self.re4(re3)
            re4 = self.relu(re4)
            re4 = torch.sum(re4, dim=(2, 3))
            gan_logits = self.fully_connect_gan2(re4)
            if self.ssup:
                rot_logits = self.fully_connect_rot2(re4)
                rot_prob = self.softmax(rot_logits)

        if self.ssup:
            return self.sigmoid(gan_logits), gan_logits, rot_logits, rot_prob
        else:
            return self.sigmoid(gan_logits), gan_logits
示例#6
0
def sent2vec(sent=''):
    """
    Parameters
    ----------
    sent : string
        sentence
    Returns
    -------
    sentence_vector : torch.FloatTensor
        sentence vector from word vector, formatting in torch.tensor
    example
    -------    
    I love you.
    =>
        tensor([
         [1.0765e-01, -3.6939e+00,  1.2139e+00, -1.0561e+00, -2.0084e+00,       # "I" vector
          -1.4055e+00, -9.0298e-01, -2.3618e-01,  1.5151e+00, -1.2158e-01,
          2.3321e+00, -5.7944e-01, -2.2252e-01, ...],
         [-6.3879e-01, -1.7294e+00,  1.1637e-01, -1.0025e+00, -6.6298e-01,      # "love" vector
          -1.6146e+00, -1.1563e+00, -1.4284e+00,  1.1772e+00, -1.4051e+00,
          -5.2077e-01, -4.0171e-01, -1.9743e-01, ...],
         [4.7850e-01, -1.4013e+00, -7.7003e-01, -9.6428e-01, -6.0314e-01,       # "you" vector
          1.7834e-01,  6.1909e-02, -2.0041e-01,  4.4003e-01,  5.2138e-01,
          -2.2191e-01, -2.6324e-02, -1.1932e+00, ...]
        ])
    =>
        torch.Size([3,250]) #[keywords num, word2vec dim]
    """
    inputs = torch.tensor([[1.0, 2.0, 3.0]])
    inputs = torch.cat(inputs)  # torch.cat 合併向量
    inputs = torch.view(len(inputs), 250)  #torch.view 依指定數字做組合
    return inputs
示例#7
0
 def backward(ctx, grad_output):
     gate = ctx.saved_tensors[0]
     if gate.item() == 0:
         beta = torch.cuda.FloatTensor(grad_output.size(0)).uniform_(0, 1)
         beta = torch.view(beta.size(0), 1, 1, 1).expand_as(grad_output)
         beta = Variable(beta)
         return beta * grad_output, None, None, None
     else:
         return grad_output, None, None, None
示例#8
0
    def imageModel(self, img):

        img = torch.FloatTensor(img)
        f_I = self.model(
            img
        )  # It should return feature maps of shape = (batch_size, 14, 14, 512)
        f_I = torch.view(self.batch_size, 14 * 14, 512)
        v_I = F.tanh(self.W_I(f_I))  # (batch_size, 196, hidden_size)

        return v_I
示例#9
0
 def forward(self, x):
     out = self.upsample(x)
     out = self.relu(self.conv1(out))
     out = self.relu(self.conv2(out))
     out = self.relu(self.conv3(out))
     out = self.relu(self.conv4(out))
     out = self.relu(self.conv5(out))
     out = self.relu(self.conv6(out))
     out = self.conv7(out)
     out = torch.view(-1, self.n_actions)
     acc = self.sigmoid(out[0])
     steer = self.tanh(out[1])
     bools = out[2:]
     return acc, steer, bools
示例#10
0
    def forward(self, features, captions, concepts, lengths):
        """
        :param features: encoded picture features, batch_size * 196 * 152
        :param captions: batch_size * time_step
        :param concepts: concepts of picture[sparse matrix], batch_size * concepts_size
        :param lengths: valid lengths for each padded caption.
        :return: predicts of each time step.
        """

        batch_size, time_step = captions.data.shape
        predicts = torch.zeros(batch_size, time_step, self.vocab_size)

        # we can initialize as mean of features or view it as 196 * 152 1d feature vector
        h0, c0 = self.get_start_states(batch_size)
        word_embeddings = self.E_voc(
            captions)  # batch_size * time_steps * embed_size
        concepts_embeddings = self.E_concept(
            concepts)  # batch_size * num_concepts * con_embed_size

        for t in xrange(time_step):
            batch_size = sum(i >= t for i in lengths)
            words_input = word_embeddings[:batch_size, t, :]
            if t == 0:
                xt = self.feature_ly(torch.view(batch_size,
                                                -1))  # batch * input_size
            else:
                alpha, _ = self.att_in(concepts_embeddings, words_input)
                alpha = alpha.unsqueeze(2).expand(-1, -1,
                                                  self.embed_size_concept)
                weighted_sum = torch.sum(alpha * concepts_embeddings,
                                         1).squeeze(1)
                weighted_sum = self.concept_dim_ly(weighted_sum)
                xt = self.att_in_out_ly(weighted_sum + words_input)

            h0, c0 = self.lstm_cell(xt,
                                    (h0[:batch_size, :], c0[:batch_size, :]))
            beta = self.att_out(h0, concepts_embeddings)
            # batch size, hidden_size, #concepts
            weighted_sum_out = torch.sum(beta * F.relu(concepts_embeddings),
                                         1).squeeze(1)
            weighted_sum_out = self.linear_w(weighted_sum_out)
            outputs = self.att_out_out(weighted_sum_out)
            predicts[:batch_size, t, :] = outputs
        return outputs
示例#11
0
 def pca(self, X, k):  # k is the components you want
     # mean of each feature
     mean = torch.sum(X)
     # normalization
     norm_X = X - mean
     norm_X = torch.view(1, len(X))
     # scatter matrix
     scatter_matrix = torch.dot(torch.transpose(norm_X, 0, 1), norm_X)
     # Calculate the eigenvectors and eigenvalues
     eig_val, eig_vec = np.linalg.eig(scatter_matrix.numpy())
     eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(len(X))]
     # sort eig_vec based on eig_val from highest to lowest
     eig_pairs.sort(reverse=True)
     # select the top k eig_vec
     feature = np.array([ele[1] for ele in eig_pairs[:k]])
     # get new data
     data = np.dot(norm_X, np.transpose(feature))
     data = torch.tensor(data)
     return data
示例#12
0
    def loss(self, y_true, y_pred, from_logits=False, label_smoothing=0):
        """
        Calculate the loss
          (The test process will use this function)
        TODO
            you should provide this function no matter you use it in training or not;
            because the test process would call this function
        :return: loss (float)
        """
        y_true = torch.view(-1, y_true)

        # calculate the padding mask
        mask = torch.cast(torch.math.not_equal(y_true, 0), y_pred.dtype)

        # calculate the loss
        loss_ = self.compile_params['loss'](y_true, y_pred)

        # remove the padding part's loss by timing the mask
        loss_ *= mask

        # calculate the mean loss
        return tf.reduce_mean(loss_)
示例#13
0
    def forward(self, image, text):
        """ Given the image and its caption embedding, predict whether the image
        is real or fake.

        Arguments
        ---------
        image : torch.FloatTensor
            image.size() = (batch_size, 64, 64, 3)

        text : torch.FloatTensor
            Output of the skipthought embedding model for the caption
            text.size() = (batch_size, text_embed_dim)

        --------
        Returns
        --------
        output : Probability for the image being real/fake
        logit : Final score of the discriminator

        """

        d_net_out = self.d_net(image)  # (batch_size, 4, 4, 512)
        text_reduced = self.text_reduced_dim(text)  # (batch_size, text_reduced_dim)
        text_reduced = text_reduced.squeeze(1)  # (batch_size, 1, text_reduced_dim)
        text_reduced = text_reduced.squeeze(2)  # (batch_size, 1, 1, text_reduced_dim)
        text_reduced = text_reduced.expand(1, 4, 4, self.text_reduced_dim)

        concat_out = torch.cat((d_net_out, text_reduced), 3)  # (1, 4, 4, 512+text_reduced_dim)

        logit = self.cat_net(concat_out)
        concat_out = torch.view(-1, concat_out.size()[1] * concat_out.size()[2] * concat_out.size()[3])
        concat_out = self.linear(concat_out)

        output = F.sigmoid(logit)

        return output, logit
示例#14
0
    def forward(self, sound, target):
        # print(sound.shape, target.shape)
        enc_mask = get_enc_padding_mask(sound).to(self.device)
        sound, enc_mask = self.sound_embed(sound, enc_mask)
        new_feat_len = torch.tensor([len(enc_mask[0]) - enc_mask[i].sum() for i in range(enc_mask.shape[0])]).to(self.device)
        sound[enc_mask] = 0
        target = self.text_embed(target)
        target = self.pos_encoder(target)
        trg_mask = generate_square_subsequent_mask(target.size(1)).to(self.device) # for asr

        # out = self.transformer(sound.permute(1,0,2), target.permute(1, 0, 2),
        #                        tgt_mask=trg_mask, src_key_padding_mask=enc_mask)


        enc = self.transformer.encoder(sound.permute(1, 0, 2), src_key_padding_mask=enc_mask)
        # out_ctc = self.lin_ctc(enc).permute(1, 0, 2) # for asr
        # out = self.transformer.decoder(target.permute(1, 0, 2), enc, tgt_mask=trg_mask) # for asr
        out = self.transformer.decoder(target.permute(1, 0, 2), enc) # for classifier
        out = torch.view(-1, out) # for classifier
        # out = out.max(dim=0, keepdim=True)[0]
        out = self.out_lin_class(out.permute(1, 0, 2)[:, 1, :]) # for classifier
        # out = self.out_lin(out.permute(1, 0, 2)) # for asr
        # return out, out_ctc, new_feat_len # for asr
        return out
示例#15
0
		return x

	def num_flat_features(self,x):
		size=x.size()[1:]
		num_features=1
		for s in size:
			num_features*=s

		return num_features

net=Net()
print(net)

params=list(net.parameters())
print(len(params))
print(params[0].size())

input=torch.randn(1,1,32,32)
out=net(input)
print(out)

net.zero_grad()
out.backward(torch.randn(1,10))

output=net(input)
target=torch.randn(10)
target=torch.view(1,-1)
criterion=nn.MSELoss()

loss=criterion(output,target)
print(loss)
示例#16
0
    def forward(self,
                roi_feat,
                position_embedding,
                nongt_dim,
                fc_dim,
                feat_dim,
                dim=(1024, 1024, 1024),
                group=16,
                index=1):
        """ Attetion module with vectorized version

                Args:
                    roi_feat: [num_rois, feat_dim]
                    position_embedding: [num_rois, nongt_dim, emb_dim]
                    nongt_dim:
                    fc_dim: should be same as group
                    feat_dim: dimension of roi_feat, should be same as dim[2]
                    dim: a 3-tuple of (query, key, output)
                    group:
                    index:

                Returns:
                    output: [num_rois, ovr_feat_dim, output_dim]
                """
        # 因为dim默认是(1024, 1024, 1024),group默认是16,所以dim_group就是(64, 64, 64)。
        dim_group = (dim[0] / group, dim[1] / group, dim[2] / group)
        # 在roi_feat的维度0上选取前nongt_dim的值,得到的nongt_roi_feat的维度是[nongt_dim, feat_dim]
        nongt_roi_feat = roi_feat[:nongt_dim, :]

        # 将[num_rois, nongt_dim, emb_dim]的position_embedding reshape
        emb_shape = position_embedding.shape()
        # [num_rois * nongt_dim, emb_dim]
        position_embedding_reshape = torch.view(emb_shape[0] * emb_shape[1],
                                                emb_shape[2])

        # position_feat_1, [num_rois * nongt_dim, fc_dim]
        position_feat_1 = F.relu(self.pos_fc(position_embedding_reshape))
        # aff_weight, [num_rois, nongt_dim, fc_dim]
        aff_weight = position_feat_1.view(-1, nongt_dim, fc_dim)
        # 几何权重, [num_rois, fc_dim, nongt_dim]
        aff_weight = aff_weight.transpose(0, 2, 1)

        # multi head
        assert dim[0] == dim[1], 'Matrix multiply requires same dimensions!'
        # 用全连接层得到q_data,全连接层参数对应论文中公式4的WQ,
        # roi_feat对应公式4的fA,维度[num_rois, feat_dim]。q_data:[num_rois, 1024]
        q_data = self.query(roi_feat)
        # [num_rois, group, dim_group[0]],默认是[num_rois, 16, 64],
        q_data_batch = q_data.view(-1, group, dim_group[0])
        # [group, num_rois, dim_group[0]],默认是[16, num_rois, 64]。
        q_data_batch = q_data_batch.transpose(1, 0, 2)

        # 用全连接层得到k_data,全连接层参数对应论文中公式4的WK,
        # nongt_roi_feat对应公式4的fA,维度[nongt_dim, feat_dim]。k_data:[nongt_dim, 1024]
        k_data = self.key(nongt_roi_feat)
        # [nongt_dim, group, dim_group[1]],默认是[nongt_dim, 16, 64],
        k_data_batch = k_data.view(-1, group, dim_group[1])
        # [group, nongt_dim, dim_group[1]],默认是[16, nongt_dim, 64]。
        k_data_batch = k_data_batch.transpose(1, 0, 2)

        v_data = nongt_roi_feat

        # 论文中公式4的矩阵乘法。
        # aff维度是[group, num_rois, nongt_dim],默认是[16, num_rois, nongt_dim]。
        aff = torch.bmm(q_data_batch, k_data_batch.transpose(0, 2, 1))
        # aff_scale, [group, num_rois, nongt_dim] 对应论文中公式4的除法
        aff_scale = (1.0 / torch.sqrt(float(dim_group[1]))) * aff
        # [num_rois, group, nongt_dim]
        # 这个aff_scale就是论文中公式4的结果:wA
        aff_scale = aff_scale.transpose(1, 0, 2)

        assert fc_dim == group, 'fc_dim != group'
        # weighted_aff, [num_rois, fc_dim, nongt_dim]
        # maximum对应论文中公式5,softmax实现公式3,而在softmax中
        # # 会对输入求指数(以e为底),而要达到论文中公式3的形式(e的指数只有wA,没有wG),
        # # 就要先对wGmn求log,这样再求指数时候就恢复成wG。简而言之就是e^(log(wG)+wA)=wG+e^(wA)。
        # # softmax实现论文中公式3的操作,axis设置为2表示在维度2上进行归一化。
        weighted_aff = torch.log(torch.maximum(aff_weight, 1e-6)) + aff_scale
        # [num_rois, fc_dim, nongt_dim]
        aff_softmax = self.weighted_affinity(weighted_aff)
        # [num_rois * fc_dim, nongt_dim]
        aff_softmax_reshape = aff_softmax.view(-1, nongt_dim)

        # 公式2
        # output_t, [num_rois * fc_dim, feat_dim] w和fA相乘
        output_t = torch.mm(aff_softmax_reshape, v_data)
        # output_t, [num_rois, fc_dim * feat_dim, 1, 1]
        output_t = output_t.view(-1, fc_dim * feat_dim, 1, 1)

        # 公式2用dim[2](默认是1024)的1*1卷积计算,卷积层的参数对应论文中公式2的WV
        # linear_out, [num_rois, dim[2], 1, 1]
        linear_out = self.linear_out(output_t)
        # [num_rois, dim[2]],
        # 加上groups的操作(group数量设置为fc_dim,默认是16,对应论文中的Nr参数)完成了concat所有的fR
        output = linear_out.squeeze()

        return output
示例#17
0
文件: omernn.py 项目: omerlevy/sru
    def forward(self, inputs, state, inputs_mask, hidden_state_mask,
                output_mask):
        # State
        if self.architecture.dual_state():
            hidden_state, cell_state = state
        else:
            hidden_state = cell_state = state

        # RNN Dropout
        dropped_inputs = inputs_mask * inputs
        dropped_hidden_state = hidden_state_mask * hidden_state

        # Content
        if self.architecture.content.has_transformation:
            if self.architecture.content.has_state:
                content_args = torch.cat(
                    [dropped_inputs, dropped_hidden_state], -1)
            else:
                content_args = dropped_inputs
            content = torch.mm(content_args, self.w_content)
        else:
            content = inputs  # TODO Should this be dropped out? Technically, the dropout is for the matrices.
        if self.architecture.content.has_bias:
            content = content + self.b_content
        if self.architecture.content.has_tanh:
            content = F.tanh(content)

        # Gates - Computation
        if self.architecture.gates.has_transformation:
            args = []
            if self.architecture.gates.is_state_arg:
                args.append(dropped_hidden_state)
            if self.architecture.gates.is_content_arg:
                args.append(content)  # TODO Should this be dropped out?
            if self.architecture.gates.is_input_arg:
                args.append(dropped_inputs)
            gates = torch.mm(torch.cat(args, -1), self.w_gates) + self.b_gates
        else:
            gates = self.b_gates

        # Gates - Aggregation
        num_gates = self.architecture.gates.num_gates()
        # Softmax
        if self.architecture.gates.is_softmax:
            gates = torch.view(
                torch.view(gates, [-1, self.hidden_size, num_gates]),
                [-1, num_gates])
            gates = F.softmax(gates)
            gates = torch.view(gates, [-1, self.hidden_size, num_gates])
            gates = [
                torch.unsqueeze(gate, -1)
                for gate in torch.split(gates, 1, -1)
            ]

            new_cell_state = gates[0] * cell_state + gates[1] * content
            if self.architecture.gates.has_highway and self.input_size == self.hidden_size:
                new_cell_state += gates[2] * inputs
            output = new_hidden_state = new_cell_state
            output = output_mask * output  # TODO This is different because it includes the highway
        # Sigmoid
        else:
            gates = torch.split(F.sigmoid(gates), self.hidden_size, -1)
            if self.architecture.gates.is_coupled:
                new_cell_state = gates[0] * cell_state + (1 -
                                                          gates[0]) * content
                gates = gates[1:]
            else:
                new_cell_state = gates[0] * cell_state + gates[1] * content
                gates = gates[2:]
            new_hidden_state = new_cell_state
            if self.architecture.gates.has_tanh:
                new_hidden_state = F.tanh(new_hidden_state)
            if self.architecture.gates.has_zero_gate:
                new_hidden_state = gates[0] * new_hidden_state
            output = new_hidden_state
            output = output_mask * output  # TODO This is different because it includes the highway
            if self.architecture.gates.has_highway and self.input_size == self.hidden_size:
                output = gates[-1] * output + (1 - gates[-1]) * inputs

        return output, (new_hidden_state, new_cell_state)
示例#18
0
def dense_resample(im, flow_im, output_valid_mask=False):
    """ Resample reward at particular locations.
    Args:
      im:      ...xHxW matrix to sample from.
      flow_im: ...xHxWx2 matrix, samples the image using absolute offsets as given
               by the flow_im.
    """
    valid_mask = None

    x, y = torch.unbind(flow_im, axis=-1)
    x = x.view(-1)
    y = y.view(-1)

    # constants
    # shape = tf.unstack(tf.shape(im))
    # channels = shape[-1]
    shape = im.size()
    width = shape[-1]
    height = shape[-2]
    num_batch = 1
    for dim in shape[:-2]:
        num_batch *= dim
    zero = Variable(torch.Tensor([0]).double())
    # num_batch = tf.cast(tf.reduce_prod(tf.stack(shape[:-3])), 'int32')
    # zero = tf.constant(0, dtype=tf.int32)

    # Round up and down.
    x0 = torch.floor(x)
    x1 = x0 + 1
    y0 = torch.floor(y)
    y1 = y0 + 1

    x0 = x0.clamp(0, width - 1)
    x1 = x1.clamp(0, width - 1)
    y0 = y0.clamp(0, height - 1)
    y1 = y1.clamp(0, height - 1)
    dim2 = width
    dim1 = width * height

    # Create base index
    base = torch.range(num_batch) * dim1
    base = base.view(-1, 1)
    # base = tf.reshape(tf.range(num_batch) * dim1, shape=[-1, 1])
    base = base.expand(base.size()[0],
                       height * width).view(-1)  # batch_size * H * W
    # base = tf.reshape(tf.tile(base, [1, height * width]), shape=[-1])

    base_y0 = base + y0.expand(base.size()) * dim2
    base_y1 = base + y1.expand(base.size()) * dim2
    idx_a = base_y0 + x0.expand(base_y0.size())
    idx_b = base_y1 + x0.expand(base_y1.size())
    idx_c = base_y0 + x1.expand(base_y0.size())
    idx_d = base_y1 + x1.expand(base_y1.size())

    # use indices to lookup pixels in the flat image and restore channels dim
    # sh = tf.stack([tf.constant(-1, dtype=tf.int32), channels])
    im_flat = torch.view(im, [-1])
    # im_flat = tf.cast(tf.reshape(im, sh), dtype=tf.float32)
    pixel_a = torch.gather(im_flat, idx_a)
    pixel_b = torch.gather(im_flat, idx_b)
    pixel_c = torch.gather(im_flat, idx_c)
    pixel_d = torch.gather(im_flat, idx_d)

    # and finally calculate interpolated values
    # x1_f = tf.to_float(x1)
    # y1_f = tf.to_float(y1)
    x1_f = x1.float()
    y1_f = y1.float()

    wa = torch.unsqueeze(((x1_f - x) * (y1_f - y)), 1)
    wb = torch.unsqueeze(((x1_f - x) * (1.0 - (y1_f - y))), 1)
    wc = torch.unsqueeze(((1.0 - (x1_f - x)) * (y1_f - y)), 1)
    wd = torch.unsqueeze(((1.0 - (x1_f - x)) * (1.0 - (y1_f - y))), 1)

    output = wa * pixel_a.unsqueeze(1) + wb * pixel_b.unsqueeze(
        1) + wc * pixel_c.unsqueeze(1) + wd * pixel_d.unsqueeze(1)
    # output = tf.reshape(output, shape=tf.shape(im))
    output = output.view(im.size())
    return output, valid_mask