Exemplo n.º 1
0
    def sample(self, model_output: torch.Tensor) -> torch.Tensor:
        """
        Sample uniformly between [0, 1] (for each batch example) and return the linear interpolation between the fitted
        quantiles closest to the sampled value.

        model_output is of shape (batch_size, n_timesteps, n_components, n_quantiles)
        """
        device = model_output.device
        num_samples, n_timesteps, n_components, n_quantiles = model_output.shape

        # obtain samples
        probs = torch.rand(size=(
            num_samples,
            n_timesteps,
            n_components,
            1,
        )).to(device)
        # add dummy dim
        probas = probs.unsqueeze(-2)

        # tile and transpose
        p = torch.tile(probas, (1, 1, 1, n_quantiles, 1)).transpose(4, 3)

        # prepare quantiles
        tquantiles = torch.tensor(self.quantiles).reshape(
            (1, 1, 1, -1)).to(device)

        # calculate index of biggest quantile smaller than the sampled value
        left_idx = torch.sum(p > tquantiles, dim=-1)

        # obtain index of smallest quantile bigger than sampled value
        right_idx = left_idx + 1

        # repeat the model output on the edges
        repeat_count = [1] * n_quantiles
        repeat_count[0] = 2
        repeat_count[-1] = 2
        repeat_count = torch.tensor(repeat_count).to(device)
        shifted_output = torch.repeat_interleave(model_output,
                                                 repeat_count,
                                                 dim=-1)

        # obtain model output values corresponding to the quantiles left and right of the sampled value
        left_value = torch.gather(shifted_output, index=left_idx, dim=-1)
        right_value = torch.gather(shifted_output, index=right_idx, dim=-1)

        # add 0 and 1 to quantiles
        ext_quantiles = [0.0] + self.quantiles + [1.0]
        expanded_q = torch.tile(torch.tensor(ext_quantiles),
                                left_idx.shape).to(device)

        # calculate closest quantiles to the sampled value
        left_q = torch.gather(expanded_q, index=left_idx, dim=-1)
        right_q = torch.gather(expanded_q, index=right_idx, dim=-1)

        # linear interpolation
        weights = (probs - left_q) / (right_q - left_q)
        inter = left_value + weights * (right_value - left_value)

        return inter.squeeze(-1)
Exemplo n.º 2
0
    def forward(self, X_wordid, X_mask, pos, Y=None):
        # X_wordid is the batch-size x max-seq-len matrix of WordPiece token indices
        # X_mask is the batch-size x max-seq-len matrix of 0s and 1s.
        # X_mask = 0 for padded tokens, otherwise 1
        # 
        # pos is the batch-size x 4 matrix
        # (pos[i, 0], pos[i, 1]) is the opinion expression span
        # (pos[i, 2], pos[i, 3]) is the target span
        # 
        # Y is the batch-size x max-seq-len matrix of the token labels
        # O = 0, B = 1, I = 2
        # Padded tokens also have label O (= 0)
        # 
        # If Y is not None, return loss
        # otherwise return Y_pred
        
        X_word = self.encoder(X_wordid, X_mask).last_hidden_state

        batch_size, max_seq_len = X_wordid.shape
        X_eposid = torch.tile(torch.LongTensor(np.arange(max_seq_len)), (batch_size, 1))
        X_tposid = torch.tile(torch.LongTensor(np.arange(max_seq_len)), (batch_size, 1))

        for i in range(batch_size):
            estart, eend, tstart, tend = pos[i]
            length = X_mask[i].sum()
            
            X_eposid[:estart] = estart - X_eposid[:estart]
            X_eposid[estart: eend] = 0
            X_eposid[eend:] = X_eposid[eend:] - eend + max_seq_len
            X_eposid[length:] = 2*max_seq_len - 1

            X_tposid[:tstart] = tstart - X_tposid[:tstart]
            X_tposid[tstart: tend] = 0
            X_tposid[tend:] = X_tposid[tend:] - tend + max_seq_len
            X_tposid[length:] = 2*max_seq_len - 1
        
        X_epos = self.expression_position_embedding(X_eposid)
        X_tpos = self.target_position_embedding(X_tposid)

        X = torch.cat((X_word, X_epos, X_tpos), dim=2)
        # X is of shape batch-size x max-seq-len x (Hw + 2 * Hpos)
        # Hw is the encoder hidden size, Hpos is the position embedding size

        A, _ = self.bilstm(X)
        A = A.contiguous()
        # A is of shape batch-size x max-seq-len x 2H
        # H is the hidden size

        B = self.output(A)
        # B is of shape batch-size x max-seq-len x 3

        crf_mask = X_mask.byte()
        Y_pred = self.crf.decode(B, crf_mask)
        # Y_pred is a list of list of integers

        if Y is not None:
            loss = -self.crf(B, Y, crf_mask)
            return loss, Y_pred
        else:
            return Y_pred
Exemplo n.º 3
0
def get_argmin_mat(uniq_scale_dict: dict):
    """
    Calculate the mapping between the base scale and other scales. A segment from a longer scale is
    repeatedly mapped to a segment from a shorter scale or the base scale.

    Args:
        uniq_scale_dict (dict) :
            Dictionary of embeddings and timestamps for each scale.

    Returns:
        session_scale_mapping_dict (dict) :
            Dictionary containing argmin arrays indexed by scale index.
    """
    scale_list = sorted(list(uniq_scale_dict.keys()))
    segment_anchor_dict = {}
    for scale_idx in scale_list:
        time_stamp_list = uniq_scale_dict[scale_idx]['time_stamps']
        time_stamps_float = torch.tensor(
            [[float(x.split()[0]), float(x.split()[1])]
             for x in time_stamp_list])
        segment_anchor_dict[scale_idx] = torch.mean(time_stamps_float, dim=1)

    base_scale_idx = max(scale_list)
    base_scale_anchor = segment_anchor_dict[base_scale_idx]
    session_scale_mapping_dict = {}
    for scale_idx in scale_list:
        curr_scale_anchor = segment_anchor_dict[scale_idx]
        curr_mat = torch.tile(curr_scale_anchor,
                              (base_scale_anchor.shape[0], 1))
        base_mat = torch.tile(base_scale_anchor,
                              (curr_scale_anchor.shape[0], 1)).t()
        argmin_mat = torch.argmin(torch.abs(curr_mat - base_mat), dim=1)
        session_scale_mapping_dict[scale_idx] = argmin_mat
    return session_scale_mapping_dict
Exemplo n.º 4
0
    def layout_bbox(self, final_pred, batch_size, num_bboxes, num_classes,
                    output_height, output_width):
        # 5, 188, 20
        final_pred = torch.reshape(final_pred,
                                   [batch_size, num_bboxes, 4 + num_classes])
        #print('Final pred:',final_pred.shape)
        return self.rectangle_render(final_pred)
        0 / 0
        final_pred = torch.reshape(final_pred,
                                   [batch_size, 4 + num_classes, num_bboxes])
        print('Final pred requires grad:', final_pred.requires_grad)
        bbox_reg = final_pred[:, :4, :]
        cls_prob = final_pred[:, 4:, :]

        print('bbox requires grad:', bbox_reg.requires_grad)
        bbox_reg = torch.reshape(bbox_reg, [batch_size, num_bboxes, 4])

        x_c = bbox_reg[:, :, 0] * output_width
        y_c = bbox_reg[:, :, 1] * output_height
        w = bbox_reg[:, :, 2] * output_width
        h = bbox_reg[:, :, 3] * output_height

        x1 = x_c - 0.5 * w
        x2 = x_c + 0.5 * w
        y1 = y_c - 0.5 * h
        y2 = y_c + 0.5 * h

        xt = torch.reshape(
            torch.range(start=0, end=output_width, dtype=torch.float32),
            [1, 1, 1, -1])
        xt = torch.reshape(
            torch.tile(xt, [batch_size, num_bboxes, output_height, 1]),
            [batch_size, num_bboxes, -1])

        yt = torch.reshape(
            torch.range(start=0, end=output_height, dtype=torch.float32),
            [1, 1, 1, -1])
        yt = torch.reshape(
            torch.tile(yt, [batch_size, num_bboxes, 1, output_width]),
            [batch_size, num_bboxes, -1])

        x1_diff = torch.reshape(
            xt - x1, [batch_size, num_bboxes, output_height, output_width, 1])
        y1_diff = torch.reshape(
            yt - y1, [batch_size, num_bboxes, output_height, output_width, 1])
        x2_diff = torch.reshape(
            x2 - xt, [batch_size, num_bboxes, output_height, output_width, 1])
        y2_diff = torch.reshape(
            y2 - yt, [batch_size, num_bboxes, output_height, output_width, 1])

        x1_line = self.relu(1.0 - torch.abs(x1_diff)) * torch.minimum(
            self.relu(y1_diff), 1.0) * torch.minimum(self.relu(y2_diff), 1.0)
        print(x1_line.shape)
        print(x1_line)

        0 / 0
Exemplo n.º 5
0
def __prepare(a, b):
    # extend as cols
    repetitions = b.shape[0]
    at = torch.tile(a, (repetitions, 1))
    at = at.transpose(-1, 0)

    # extend as rows
    # bt = np.tile(b, (repetitions, 1))
    repetitions = a.shape[0]
    bt = torch.tile(b, (repetitions, 1))
    return at, bt
Exemplo n.º 6
0
 def get_alpha_beta(self, config, training=False):
     a_val = np.log(np.exp(config.model.alpha0) - 1)
     b_val = np.log(np.exp(1.) - 1)
     initial = torch.zeros(self.hidden[self.num_layers-1], dtype=torch.float32).to(config.device)
     self.a = Variable(initial) + a_val
     self.b = Variable(initial) + b_val
     beta_a = F.softplus(self.a)
     beta_b = F.softplus(self.b)
     beta_a = torch.unsqueeze(beta_a, 0)
     beta_b = torch.unsqueeze(beta_b, 0)
     self.beta_a = torch.tile(beta_a, [config.model.num_nodes, 1])
     self.beta_a = Variable(self.beta_a, requires_grad=True)
     self.beta_b = torch.tile(beta_b, [config.model.num_nodes, 1])
     self.beta_b = Variable(self.beta_b, requires_grad=True)
Exemplo n.º 7
0
def train():

    # Initialize torch.distributed
    init_distributed()

    print_rank_0('AutoMP: training GPT2...')
    # Use fake train data
    batch_size = args.batch_size
    sequence_length = args.sequence_length
    hidden_size = args.hidden_size
    vocab_size = args.vocab_size
    dropout_prob = args.hidden_dropout

    input_indices = torch.randint(low=0,
                                  high=vocab_size,
                                  size=(batch_size, sequence_length))
    input_indices = input_indices.to(torch.cuda.current_device())
    position_indices = torch.tile(torch.arange(start=0, end=sequence_length),
                                  (batch_size, 1))
    position_indices = position_indices.to(torch.cuda.current_device())
    print_rank_0(f'AutoMP: input_indices shape = {input_indices.size()}')
    print_rank_0(f'AutoMP: position_indices shape = {position_indices.size()}')

    def init_method_normal(tensor):
        return torch.nn.init.normal_(tensor, mean=0.0, std=1.0)

    embedding = Embedding(hidden_size=hidden_size,
                          vocab_size=vocab_size,
                          max_sequence_length=sequence_length,
                          embedding_dropout_prob=dropout_prob,
                          init_method=init_method_normal)

    optimizer = torch.optim.SGD(embedding.parameters(), lr=0.01)

    profiler = Profiler(os.path.join('benchmark', args.exp_name))

    num_epochs = 5
    tot_time = 0
    nproc = torch.distributed.get_world_size()

    for epoch in range(num_epochs):
        overall_name = f'emb_np-{nproc}_vs-{vocab_size}'
        profiler.start(overall_name)

        # Forward pass
        profiler.start(f'emb_forward_np-{nproc}_vs-{vocab_size}')
        embedding_output = embedding.forward(input_indices, position_indices)
        train_loss = torch.mean(embedding_output)
        torch.cuda.synchronize()
        profiler.stop(f'emb_forward_np-{nproc}_vs-{vocab_size}')

        # Backward pass
        profiler.start(f'emb_backward_np-{nproc}_vs-{vocab_size}')
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        profiler.stop(f'emb_backward_np-{nproc}_vs-{vocab_size}')

        profiler.stop(overall_name)
Exemplo n.º 8
0
    def forward(self,
                x,
                mask=None,
                attn_weights=False,
                norm_attn_weights=True):
        # x.shape = [batch, length, token_dim]
        batch_size = x.shape[0]

        keys = self._transpose_multihead(self.keys(x))
        values = self._transpose_multihead(self.values(x))
        if self.fixed_queries:
            queries = torch.tile(self.queries, (batch_size, 1, 1))
        else:
            queries = self._transpose_multihead(self.queries(x))

        # dot_products.shape = [batch, length (query), length (key)]
        dot_products = torch.matmul(queries, keys.transpose(1, 2))
        if mask is not None:
            dot_products = dot_products * mask - 1e9 * (1 - mask)

        weights = self.attention_function(
            dot_products / np.sqrt(self.d_model // self.n_heads), dim=-1)
        result = self._untranspose_multihead(torch.matmul(weights, values))

        if self.n_heads > 1:
            result = self.output_linear(result)

        if attn_weights:
            weights_cpu = weights.detach().cpu()
            if norm_attn_weights:
                values_norm = torch.norm(values.detach().cpu(),
                                         dim=-1).unsqueeze(1)
                weights_cpu = weights_cpu * values_norm
            return result, weights_cpu
        return result
Exemplo n.º 9
0
    def concat_dependencies(self, hidden, other_features_hidden):
        if len(self.dependencies) > 0:
            dependencies_hidden = []
            for dependency in self.dependencies:
                # the dependent feature is ensured to be present in final_hidden
                # because we did the topological sort of the features before
                dependency_final_hidden = other_features_hidden[dependency]

                if len(hidden.shape) > 2:
                    if len(dependency_final_hidden.shape) > 2:
                        # matrix matrix -> concat
                        assert hidden.shape[
                            1] == dependency_final_hidden.shape[1]
                        dependencies_hidden.append(dependency_final_hidden)
                    else:
                        # matrix vector -> tile concat
                        sequence_max_length = hidden.shape[1]
                        multipliers = (1, sequence_max_length, 1)
                        tiled_representation = torch.tile(
                            torch.unsqueeze(dependency_final_hidden, 1),
                            multipliers)

                        # todo future: maybe modify this with TF2 mask mechanics
                        sequence_length = sequence_length_3D(hidden)
                        mask = sequence_mask(sequence_length,
                                             sequence_max_length)
                        tiled_representation = torch.mul(
                            tiled_representation,
                            mask[:, :, np.newaxis].type(torch.float32))

                        dependencies_hidden.append(tiled_representation)

                else:
                    if len(dependency_final_hidden.shape) > 2:
                        # vector matrix -> reduce concat
                        reducer = self.dependency_reducers[dependency]
                        dependencies_hidden.append(
                            reducer(dependency_final_hidden))
                    else:
                        # vector vector -> concat
                        dependencies_hidden.append(dependency_final_hidden)

            try:
                hidden = torch.cat([hidden] + dependencies_hidden, dim=-1)
            except Exception as e:
                raise ValueError(
                    "Shape mismatch while concatenating dependent features of "
                    "{}: {}, with exception {}. Concatenating the feature activations tensor {} "
                    "with activation tensors of dependencies: {}. The error is "
                    "likely due to a mismatch of the second dimension (sequence"
                    " length) or a difference in ranks. Likely solutions are "
                    "setting the maximum_sequence_length of all sequential "
                    "features to be the same,  or reduce the output of some "
                    "features, or disabling the bucketing setting "
                    "bucketing_field to None / null, as activating it will "
                    "reduce the length of the field the bucketing is performed "
                    "on.".format(self.column, self.dependencies, e, hidden,
                                 dependencies_hidden))

        return hidden
Exemplo n.º 10
0
    def transformer(self, group_embeddings, positional_data):
        
        batch_size, input_feature_dim, num_neighbors = group_embeddings.shape

        if self.attention_mode == "scalar":
            group_embeddings += self.positional_embedder(positional_data)

        query = group_embeddings[:, :, :1]

        query_E = torch.tile(self.WQ(query), dims=(1, 1, num_neighbors))
        key_E = self.WK(group_embeddings)
        value_E = self.WK(group_embeddings)
        
        # Vector attention
        if self.attention_mode == "vector":
            positional_encoding = self.positional_embedder(positional_data)
            scaled = torch.softmax(self.mapping(query_E - key_E + positional_encoding), dim=-1)
            aggregated = torch.sum(scaled * (value_E + positional_encoding), dim=-1)

            # Norm + Residual connection
            aggregated = query.squeeze(-1) + self.layer_norm(aggregated)


        # Scalar attention
        if self.attention_mode == "scalar":
            attention = torch.softmax(torch.matmul(query_E.transpose(-2, -1), key_E), dim=1)
            aggregated = torch.matmul(attention, value_E.transpose(-2, -1)).sum(dim=-2)

        return aggregated
Exemplo n.º 11
0
 def forward(self, x):
     weight = torch.tile(self.weight, (x.shape[1], 1, 1, 1)).to(x.device)
     return F.conv2d(x,
                     weight,
                     stride=self.stride,
                     padding=self.padding,
                     groups=x.shape[1])
Exemplo n.º 12
0
 def forward(self, x):
     N, C, H, W = x.shape
     x *= self.gain
     x = x.view(N, C, H, 1, W, 1)
     x = torch.tile(x, (1, 1, 1, self.factor, 1, self.factor))
     x = x.view(N, C, H * self.factor, W * self.factor)
     return x
Exemplo n.º 13
0
 def forward(self, w):
     N = w.shape[0]
     x = torch.tile(self.const, (N, 1, 1, 1))
     x = self.layer_epilogue1(x, w)
     x = self.conv(x)
     x = self.layer_epilogue2(x, w)
     return x
Exemplo n.º 14
0
def get_environment(n_atoms, grid=None):

    if n_atoms == 1:
        neighborhood_idx = -torch.ones((1, 1), dtype=torch.float32)
        offsets = torch.zeros((n_atoms, 1, 3), dtype=torch.float32)
    else:
        neighborhood_idx = torch.arange(
            n_atoms, dtype=torch.float32).unsqueeze(0).repeat(n_atoms, 1)
        neighborhood_idx = neighborhood_idx[
            ~torch.eye(n_atoms, dtype=torch.long).byte()].view(
                n_atoms, n_atoms - 1).long()

        if grid is not None:
            n_grid = grid.shape[0]
            neighborhood_idx = torch.concat(
                [neighborhood_idx, -torch.ones((n_atoms, 1))], 1)
            grid_nbh = torch.tile(
                torch.arange(n_atoms, dtype=torch.float32).unsqueeze(-1),
                (n_grid, 1))
            neighborhood_idx = torch.concat([neighborhood_idx, grid_nbh], 0)

        offsets = torch.zeros(
            (neighborhood_idx.shape[0], neighborhood_idx.shape[1], 3),
            dtype=torch.float32)
    return neighborhood_idx, offsets
Exemplo n.º 15
0
    def forward(self, input, adj):

        input_new = []
        for i in range(len(self.add_all)):

            index = torch.tensor([[i] * input.shape[1]])
            aa = torch.gather(input, 0, torch.tensor([[i] * input.shape[1]]))
            aa_tile = torch.tile(aa,
                                 [len(self.add_all[i]), 1])  #expand central
            bb_nei_index2 = self.add_all[i]
            bb_nei_index2 = np.array([[i] * input.shape[1]
                                      for i in bb_nei_index2],
                                     dtype="int64")
            bb_nei_index2 = torch.tensor(bb_nei_index2)
            bb_nei = torch.gather(input, 0, torch.tensor(bb_nei_index2))
            cen_nei = torch.cat([aa_tile, bb_nei], 1)
            mask0 = torch.mm(cen_nei, self.weights_mask0)
            mask0 = self.Sig(mask0)
            mask0 = F.dropout(mask0, self.drop_rate)

            self.mask.append(mask0)

            new_cen_nei = aa + torch.sum(
                mask0 * bb_nei, 0, keepdims=True
            )  #hadamard product of neighbors' features  and mask aggregator, then applying sum aggregator
            input_new.append(new_cen_nei)

        input_new = torch.stack(input_new)
        input_new = torch.squeeze(input_new)
        support = torch.mm(input_new, self.weight_0)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output
Exemplo n.º 16
0
    def __call__(self, params, prediction, target):
        """Parameters (such as thresholds) are used calculate score.
        Args:
          params: list of float
        Returns:
          score: float
        """
        thresholds = params
        output = torch.zeros_like(prediction)

        if self.N > prediction.size(0):
            batch_size = prediction.size(0)
        else:
            batch_size = self.N

        # Threshold to output
        output = torch.where(
            prediction > torch.tile(thresholds, (batch_size, 1)), 1, 0)

        # Calculate score
        tp = (output * target).sum(1)
        fp = (output * (1 - target)).sum(1)
        fn = ((1 - output) * target).sum(1)
        f1 = tp / (tp + (fp + fn) / 2)
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)

        #return f1.mean(), precision.mean(), recall.mean()

        return f1.mean()
Exemplo n.º 17
0
    def __init__(self, image: Union[Tensor, np.ndarray], *, metalabels: Optional[str] = None) -> None:

        if isinstance(image, torch.Tensor):
            if image.dtype != torch.uint8:
                raise ValueError(f"Tensor uint8 expected, got {image.dtype}")
            if image.dim() != 3:
                raise ValueError("Pass individual images, not batches")
            if image.size(0) not in {1, 3}:
                raise ValueError("Only grayscale and RGB images are supported")
            # Handle Grayscale images
            if image.size(0) == 1:
                image = torch.tile(image, (3, 1, 1))
            self.img = image.permute(1, 2, 0).cpu().numpy()
            self.is_bgr = False
        elif isinstance(image, np.ndarray):
            if image.dtype != np.uint8:
                raise ValueError(f"Numpy uint8 expected, got {image.dtype}")
            if image.ndim != 3:
                raise ValueError("Currently only BGR images are supported")
            self.img = image
            self.is_bgr = True
        else:
            raise TypeError(f"Tensor or numpy.ndarray expected, got {type(image)}")

        # Set dataset metadata (e.g. class names)
        self.metadata = None
        if metalabels is not None:
            self.metadata = np.loadtxt(metalabels, dtype="str", delimiter="\n")

        self.line_width = max(round(sum(self.img.shape) / 2 * 0.003), 2)
        self.assigned_colors = Colors()
        self.output = self.img
Exemplo n.º 18
0
Arquivo: c2b.py Projeto: mrsalehi/C2B
 def __init__(self, args, config, subframe_gen):
     self.frame_height, self.frame_width = config.camera.frame_height, config.camera.frame_width
     self.nbhd_height, self.nbhd_width = config.camera.nbhd_size
     n_pixels = self.nbhd_height * self.nbhd_width
     self.S = config.camera.S
     self.device = args.device
     self.max_intensity = torch.tensor(2 ** config.camera.pixel_bit_depth)
     
     assert self.frame_height % self.nbhd_height == 0
     assert self.frame_width % self.nbhd_width == 0
     assert self.S % n_pixels == 0
     
     self.subframe_gen = subframe_gen
     
     # self.scheme = torch.FloatTensor(config.camera.scheme)
     self.scheme = get_simple_scheme(self.nbhd_height, self.nbhd_width).to(self.device)
     # scheme = torch.tile(self.scheme, (height // self.nbhd_height, width // self.nbhd_width)).to(raw_subframes.device)
     # scheme_ = scheme_.unsqueeze(1).repeat(1, S // n_pixels, 1, 1).view(S, height, width)
     
     scheme_ = torch.tile(self.scheme, (self.frame_height // self.nbhd_height, 
                                             self.frame_width // self.nbhd_width)).to(self.device) # shape: (n_pixels, height, width)
     self.scheme_ = scheme_.unsqueeze(1).repeat(1, self.S // n_pixels, 1, 1).view(self.S, self.frame_height, self.frame_width)  # shape: (S, height, width)
     self.save_output = config.camera.save_output
     self.save_dir = config.camera.save_dir
     self.batch_size = config.data.batch_size
     
     self.noise_std = None
     if config.camera.add_noise:
         self.noise_std = config.camera.noise_std
     
     if self.save_output:
         os.makedirs(self.save_dir, exist_ok=True)
Exemplo n.º 19
0
    def forward(self, x, target=None):
        assert x.size()[1] >= 2

        cce_prediction = self.cce_backend(x)
        #x = self.magnitude(x) * torch.nn.functional.normalize(x)

        if target==None:
            return x, cce_prediction

        x = x.reshape(-1,2,x.size()[-1]).squeeze(1)
        out_anchor      = torch.mean(x[:,1:,:],1)
        out_positive    = x[:,0,:]

        ap_sim_matrix  = torch.nn.functional.cosine_similarity(out_positive.unsqueeze(-1),out_anchor.unsqueeze(-1).transpose(0,2))
        torch.clamp(self.w, 1e-6)
        ap_sim_matrix = ap_sim_matrix * self.w + self.b1

        labels = torch.arange(0, int(out_positive.shape[0]), device=torch.device("cuda:0")).unsqueeze(1)
        cos_sim_matrix  = torch.mm(out_positive, out_anchor.T)
        cos_sim_matrix = cos_sim_matrix + self.b2
        cos_sim_matrix = cos_sim_matrix + numpy.log(1/out_positive.shape[0] / (1 - 1/out_positive.shape[0]))
        mask = (torch.tile(labels, (1, labels.shape[0])) == labels.T).float()
        batch_loss = self.criterion(ap_sim_matrix, torch.arange(0, int(out_positive.shape[0]), device=torch.device("cuda:0"))) \
            + self.magnet_criterion(cos_sim_matrix.flatten().unsqueeze(1), mask.flatten().unsqueeze(1))
        return batch_loss, cce_prediction
Exemplo n.º 20
0
    def _single_interaction(self, current_input, current_baseline,
                            current_alphas, num_samples, batch_size,
                            use_expectation, output_index, interaction_index):
        """
        A helper function to compute path
        interactions for a single sample.
        Args:
            current_input: A single sample. Assumes that
                           it is of shape (...) where ...
                           represents the input dimensionality
            baseline: A tensor representing the baseline input.
            current_alphas: Which alphas to use when interpolating
            num_samples: The number of samples to draw
            batch_size: Batch size to input to the model
            use_expectation: Whether or not to sample the baseline
            output_index: Whether or not to index into a given class
            interaction_index: The index to take the interactions with respect to.
        """
        current_input = current_input.unsqueeze(0)
        current_alpha, current_beta = current_alphas

        current_alpha = torch.tensor(current_alpha).float().to(
            current_input.device)
        current_beta = torch.tensor(current_beta).float().to(
            current_input.device)

        current_alpha = torch.reshape(current_alpha, (num_samples,) + \
                                    (1,) * (len(current_input.shape) - 1))
        current_beta = torch.reshape(current_beta, (num_samples,) + \
                                 (1,) * (len(current_input.shape) - 1))
        attribution_array = []
        for j in range(0, num_samples, batch_size):
            number_to_draw = min(batch_size, num_samples - j)

            batch_baseline = self._sample_baseline(current_baseline,
                                                   number_to_draw,
                                                   use_expectation)
            batch_alpha = current_alpha[j:min(j + batch_size, num_samples)]
            batch_beta = current_beta[j:min(j + batch_size, num_samples)]

            reps = np.ones(len(current_input.shape)).astype(int)
            reps[0] = number_to_draw

            batch_input = torch.tile(current_input, tuple(reps))

            batch_baseline.requires_grad = True

            batch_attributions = self.accumulation_function(
                batch_input,
                batch_baseline,
                batch_alphas=(batch_alpha, batch_beta),
                output_index=output_index,
                second_order=True,
                interaction_index=interaction_index)
            attribution_array.append(batch_attributions.detach().cpu())
        attribution_array = np.concatenate(attribution_array, axis=0)
        attributions = np.mean(attribution_array, axis=0)
        return attributions
Exemplo n.º 21
0
 def tensor_indexing_ops(self):
     x = torch.randn(2, 4)
     y = torch.randn(4, 4)
     t = torch.tensor([[0, 0], [1, 0]])
     mask = x.ge(0.5)
     i = [0, 1]
     return len(
         torch.cat((x, x, x), 0),
         torch.concat((x, x, x), 0),
         torch.conj(x),
         torch.chunk(x, 2),
         torch.dsplit(torch.randn(2, 2, 4), i),
         torch.column_stack((x, x)),
         torch.dstack((x, x)),
         torch.gather(x, 0, t),
         torch.hsplit(x, i),
         torch.hstack((x, x)),
         torch.index_select(x, 0, torch.tensor([0, 1])),
         x.index(t),
         torch.masked_select(x, mask),
         torch.movedim(x, 1, 0),
         torch.moveaxis(x, 1, 0),
         torch.narrow(x, 0, 0, 2),
         torch.nonzero(x),
         torch.permute(x, (0, 1)),
         torch.reshape(x, (-1, )),
         torch.row_stack((x, x)),
         torch.select(x, 0, 0),
         torch.scatter(x, 0, t, x),
         x.scatter(0, t, x.clone()),
         torch.diagonal_scatter(y, torch.ones(4)),
         torch.select_scatter(y, torch.ones(4), 0, 0),
         torch.slice_scatter(x, x),
         torch.scatter_add(x, 0, t, x),
         x.scatter_(0, t, y),
         x.scatter_add_(0, t, y),
         # torch.scatter_reduce(x, 0, t, reduce="sum"),
         torch.split(x, 1),
         torch.squeeze(x, 0),
         torch.stack([x, x]),
         torch.swapaxes(x, 0, 1),
         torch.swapdims(x, 0, 1),
         torch.t(x),
         torch.take(x, t),
         torch.take_along_dim(x, torch.argmax(x)),
         torch.tensor_split(x, 1),
         torch.tensor_split(x, [0, 1]),
         torch.tile(x, (2, 2)),
         torch.transpose(x, 0, 1),
         torch.unbind(x),
         torch.unsqueeze(x, -1),
         torch.vsplit(x, i),
         torch.vstack((x, x)),
         torch.where(x),
         torch.where(t > 0, t, 0),
         torch.where(t > 0, t, t),
     )
Exemplo n.º 22
0
    def test_StackDenseFixedSizeArray(self):
        # happy path: value is type Tensor; check cast to float
        value = torch.eye(4).to(dtype=torch.int)  # start as int
        data = {"a": value}
        out = transforms.StackDenseFixedSizeArray(data.keys(), size=4)(data)
        expected = {"a": value.to(dtype=torch.float)}
        self.assertDictOfTensorEqual(out, expected)
        self.assertTrue(out["a"].dtype == torch.float, msg="dtype != float")

        # happy path: value is list w/ elements type Tuple[Tensor, Tensor]
        presence = torch.tensor([[1, 1, 1], [1, 1, 1]])
        data = {
            "a": [
                (torch.tensor([[0, 0, 0], [1, 1, 1]]), presence),
                (torch.tensor([[2, 2, 2], [3, 3, 3]]), presence),
            ],
            "b": [
                (torch.tensor([[3, 3, 3], [2, 2, 2]]), presence),
                (torch.tensor([[1, 1, 1], [0, 0, 0]]), presence),
            ],
        }
        out = transforms.StackDenseFixedSizeArray(data.keys(), size=3)(data)
        expected = {
            "a":
            torch.tile(
                torch.arange(4).view(-1, 1).to(dtype=torch.float), (1, 3)),
            "b":
            torch.tile(
                torch.arange(4).flip(dims=(0, )).view(-1,
                                                      1).to(dtype=torch.float),
                (1, 3),
            ),
        }
        self.assertDictOfTensorEqual(out, expected)

        # raise for tensor wrong shape
        with self.assertRaisesRegex(ValueError, "Wrong shape"):
            sdf = transforms.StackDenseFixedSizeArray(["a"], size=3)
            sdf({"a": torch.ones(2)})

        # raise for tensor wrong ndim
        with self.assertRaisesRegex(ValueError, "Wrong shape"):
            sdf = transforms.StackDenseFixedSizeArray(["a"], size=2)
            sdf({"a": torch.zeros(2, 2, 2)})
Exemplo n.º 23
0
 def make_mask(self, target: torch.Tensor) -> torch.Tensor:
     size = target.size(1)
     look_ahead_mask = ~(torch.triu(
         torch.ones(size, size, device=target.device)) == 1).transpose(
             0, 1)[:, None]
     target_padding_mask = torch.eq(target,
                                    self.vocab_size + 2)  # Pad symbol
     combined_mask = target_padding_mask | look_ahead_mask
     return torch.tile(combined_mask.permute(1, 0, 2),
                       (self.num_heads, 1, 1))
Exemplo n.º 24
0
 def build(self, input_shape):
     cfg = self.cfg
     tgt = input_shape[0]
     assert tgt[0] == cfg.batch_size
     y = torch.constant([[0.0] + [-float("inf")] * (cfg.beam_size - 1)])
     self._logp = torch.tile(y, [cfg.batch_size, 1])
     sh = (cfg.batch_size, cfg.beam_size)
     self._score = torch.ones(shape=sh) * utils.big_neg
     self._flag = torch.zeros(dtype="bool", shape=sh)
     return super().build(input_shape)
Exemplo n.º 25
0
    def forward(self, query_feature: Dict[str, Tensor], passage_features: Iterable[Dict[str, Tensor]], labels: Tensor):
        n = labels.shape[1]

        query_embedding = self.query_model(query_feature)['sentence_embedding']
        # its the scaling vector, so each element in vector should be [0, 1]
        psg_embeddings = torch.stack([self.psg_model(passages)['sentence_embedding']
                                      for passages in passage_features], dim=1)
        scaled_psg_embeddings = torch.tile(query_embedding.unsqueeze(1), (1, n, 1)) * psg_embeddings

        return scaled_psg_embeddings
Exemplo n.º 26
0
    def test():

        import numpy as np

        word_embeddings = nn.Embedding(10000, 300)
        lstm = nn.LSTM(300, 100)
        h0 = Variable(torch.zeros(1, 128, 100))
        c0 = Variable(torch.zeros(1, 128, 100))
        hidden = (h0, c0)
        sentence = Variable(
            torch.LongTensor(np.zeros((128, 30), dtype=np.int64)))
        embeds = word_embeddings(sentence)
        torch.tile(sentence)
        sentence.size()[0]

        #        x= Variable(torch.zeros(30, 128, 300))
        x = embeds.view(sentence.size()[1], self.batch_size, -1)
        embeds = embeds.permute(1, 0, 2)
        lstm_out, hidden = lstm(embeds, hidden)
Exemplo n.º 27
0
def concatenate(tensor1, tensor2):
    """ Concatenates two 2D or 4D tensors.

    Parameters
    ----------
    tensor1 : torch.Tensor
        2D or 4D tensor.
    tensor2 : torch.Tensor
        2D or 4D tensor.

    Returns
    -------
    torch.Tensor
        Cncatenation of tensor1 and tensor2.

    Raises
    ------
    NotImplementedError
        If tensors do not have 2 or 4 dimensions.
    """
    assert tensor1.shape[0] == tensor2.shape[0], (
        "Tensors to concatenate must have same dim 0. Tensor1: {}. Tensor2: {}."
        .format(tensor1.shape[0], tensor2.shape[0]))
    batch_size = tensor1.shape[0]
    if tensor1.shape == tensor2.shape:
        return torch.cat((tensor1, tensor2), axis=1).float()
    elif (len(tensor1.shape) == 2) and (len(tensor2.shape) == 2):
        return torch.cat((tensor1, tensor2), axis=1).float()
    elif (len(tensor1.shape) == 4) and (len(tensor2.shape) == 2):
        y_dim = tensor2.shape[1]
        tensor2 = torch.reshape(tensor2, shape=(batch_size, y_dim, 1, 1))
        tensor2 = torch.tile(tensor2, dims=(1, 1, *tensor1.shape[2:]))
    elif (len(tensor1.shape) == 2) and (len(tensor2.shape) == 4):
        y_dim = tensor1.shape[1]
        tensor1 = torch.reshape(tensor1, shape=(batch_size, y_dim, 1, 1))
        tensor1 = torch.tile(tensor1, dims=(1, 1, *tensor2.shape[2:]))
    elif (len(tensor1.shape) == 4) and (len(tensor2.shape) == 4):
        return torch.cat((tensor1, tensor2), axis=1).float()
    else:
        raise AssertionError(
            "tensor1 and tensor2 must have 2 or 4 dimensions. Given: {} and {}."
            .format(tensor1.shape, tensor2.shape))
    return torch.cat((tensor1, tensor2), axis=1).float()
Exemplo n.º 28
0
        def eval_step(inputs, model):
            images = inputs['features']
            labels = inputs['labels']
            images = torch.from_numpy(images._numpy()).view(
                eval_batch_size,
                3,  # pylint: disable=protected-access
                image_h,
                image_w).to(device)
            labels = torch.from_numpy(
                labels._numpy()).to(device).float().unsqueeze(-1)  # pylint: disable=protected-access

            with torch.no_grad():
                logits = torch.stack([
                    model(images)
                    for _ in range(FLAGS.num_dropout_samples_eval)
                ],
                                     dim=-1)

            # Logits dimension is (batch_size, 1, num_dropout_samples).
            logits = logits.squeeze()

            # It is now (batch_size, num_dropout_samples).
            probs = sigmoid(logits)

            # labels_tiled shape is (batch_size, num_dropout_samples).
            labels_tiled = torch.tile(labels,
                                      (1, FLAGS.num_dropout_samples_eval))

            log_likelihoods = -loss_fn(probs, labels_tiled)
            negative_log_likelihood = torch.mean(
                -torch.logsumexp(log_likelihoods, dim=-1) +
                torch.log(torch.tensor(float(FLAGS.num_dropout_samples_eval))))

            probs = torch.mean(probs, dim=-1)

            # Convert to NumPy for metrics updates
            negative_log_likelihood = negative_log_likelihood.detach()
            labels = labels.detach()
            probs = probs.detach()

            if device != 'cpu':
                negative_log_likelihood = negative_log_likelihood.cpu()
                labels = labels.cpu()
                probs = probs.cpu()

            negative_log_likelihood = negative_log_likelihood.numpy()
            labels = labels.numpy()
            probs = probs.numpy()

            metrics[dataset_split + '/negative_log_likelihood'].update_state(
                negative_log_likelihood)
            metrics[dataset_split + '/accuracy'].update_state(labels, probs)
            metrics[dataset_split + '/auprc'].update_state(labels, probs)
            metrics[dataset_split + '/auroc'].update_state(labels, probs)
            metrics[dataset_split + '/ece'].add_batch(probs, label=labels)
def meshgrid(*args, **kwargs):
    """
    
    meshgrid code that builds on (copies) tensorflow's meshgrid but dramatically
    improves runtime by changing the last step to tiling instead of multiplication.
    https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/python/ops/array_ops.py#L1921
    
    Broadcasts parameters for evaluation on an N-D grid.
    Given N one-dimensional coordinate arrays `*args`, returns a list `outputs`
    of N-D coordinate arrays for evaluating expressions on an N-D grid.
    Notes:
    `meshgrid` supports cartesian ('xy') and matrix ('ij') indexing conventions.
    When the `indexing` argument is set to 'xy' (the default), the broadcasting
    instructions for the first two dimensions are swapped.
    Examples:
    Calling `X, Y = meshgrid(x, y)` with the tensors
    ```python
    x = [1, 2, 3]
    y = [4, 5, 6]
    X, Y = meshgrid(x, y)
    # X = [[1, 2, 3],
    #      [1, 2, 3],
    #      [1, 2, 3]]
    # Y = [[4, 4, 4],
    #      [5, 5, 5],
    #      [6, 6, 6]]
    ```
    Args:
    *args: `Tensor`s with rank 1.
    **kwargs:
      - indexing: Either 'xy' or 'ij' (optional, default: 'xy').
      - name: A name for the operation (optional).
    Returns:
    outputs: A list of N `Tensor`s with rank N.
    Raises:
    TypeError: When no keyword arguments (kwargs) are passed.
    ValueError: When indexing keyword argument is not one of `xy` or `ij`.
    """

    # with ops.name_scope(name, "meshgrid", args) as name:
    ndim = len(args)
    s0 = (1, ) * ndim

    # Prepare reshape by inserting dimensions with size 1 where needed
    output = []
    for i, x in enumerate(args):
        output.append(torch.stack(x).view((s0[:i] + (-1, ) + s0[i + 1::])))

    shapes = [x.size() for x in args]
    sz = [x.size()[0] for x in args]

    for i in range(len(output)):
        output[i] = torch.tile(output[i],
                               torch.stack([*sz[:i], 1, *sz[(i + 1):]]))
    return output
Exemplo n.º 30
0
def train():

    # Initialize torch.distributed
    init_distributed()

    print_rank_0('AutoMP: training GPT2...')
    # Use fake train data
    args = get_args()
    sequence_length = 1024
    vocab_size = 4096
    dropout_prob = 0.1

    input_indices = torch.randint(low=0,
                                  high=vocab_size,
                                  size=(args.batch_size, sequence_length))
    input_indices = input_indices.to(torch.cuda.current_device())
    position_indices = torch.tile(torch.arange(start=0, end=sequence_length),
                                  (args.batch_size, 1))
    position_indices = position_indices.to(torch.cuda.current_device())
    print_rank_0(f'AutoMP: input_indices shape = {input_indices.size()}')
    print_rank_0(f'AutoMP: position_indices shape = {position_indices.size()}')

    def init_method_normal(tensor):
        return torch.nn.init.normal_(tensor, mean=0.0, std=1.0)

    embedding = Embedding(hidden_size=args.hidden_size,
                          vocab_size=vocab_size,
                          max_sequence_length=sequence_length,
                          embedding_dropout_prob=dropout_prob,
                          init_method=init_method_normal)

    embedding_output = embedding.forward(input_indices, position_indices)

    # print_rank_0(f'AutoMP: embedding_output = {embedding_output}')

    def gpt2_attention_mask_func(attention_scores, ltor_mask):
        attention_scores.masked_fill_(ltor_mask, -10000.0)
        return attention_scores

    transformer = ParallelTransformer(
        attention_mask_func=gpt2_attention_mask_func,
        num_layers=args.num_layers,
        hidden_size=args.hidden_size,
        layernorm_epsilon=args.layernorm_epsilon,
        num_attention_heads=args.num_attention_heads,
        attention_dropout=0.1,
        hidden_dropout=0.1)

    attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids(
        input_indices, vocab_size - 1)

    transformer_output = transformer.forward(hidden_states=embedding_output,
                                             attention_mask=attention_mask)
    print_rank_0(f'AutoMP: transformer_output = {transformer_output}')