def forward( # type: ignore self, anchors: TextFieldTensors, positives: TextFieldTensors = None) -> Dict[str, torch.Tensor]: """ # Parameters tokens : TextFieldTensors From a `TextField` # Returns An output dictionary consisting of: embeddings : torch.FloatTensor A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the representation for the given `tokens` output by the encoder. The encoder is composed of: `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order. projections : torch.FloatTensor A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the non-linear projection of the learned representation for the given `anchor_tokens` output by the projection head. This field will only be included if `self._feedforward` is not `None`. loss : torch.FloatTensor, optional A scalar loss to be optimized. """ output_dict: Dict[str, torch.Tensor] = {} # If multiple anchors were sampled, we need to unpack them. anchors = unpack_batch(anchors) # Mask anchor input ids and get labels required for MLM. if self.training and self._masked_language_modeling: anchors = mask_tokens(anchors, self._tokenizer) # This is the textual representation learned by a model and used for downstream tasks. masked_lm_loss, embedded_anchors = self._forward_internal( anchors, output_dict) # If positives are supplied by DataLoader and we are training, compute a contrastive loss. if self.training: output_dict["loss"] = 0 # TODO: We should throw a ValueError if no postives provided but loss is not None. if self._loss is not None: # Like the anchors, if we sampled multiple positives, we need to unpack them. positives = unpack_batch(positives) # Positives are represented by their mean embedding a la # https://arxiv.org/abs/1902.09229. _, embedded_positives = self._forward_internal(positives) # Shape: (num_anchors, num_positives_per_anchor, embedding_dim) embedded_positives = torch.reshape( embedded_positives, (embedded_anchors.size(0), -1, embedded_anchors.size(-1)), ) # Shape: (num_anchors, embedding_dim) embedded_positives = torch.mean(embedded_positives, dim=1) # If we are training on multiple GPUs using DistributedDataParallel, then a naive # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per # GPU. To avoid this, we need to gather the anchors/positives from each replica on # every other replica in order to generate the correct number of negatives, # i.e. 2 * (batch_size - 1), before computing the contrastive loss. embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs( embedded_anchors, embedded_positives) # Get embeddings into the format that the PyTorch Metric Learning library expects # before computing the loss (with an optional mining step). embeddings, labels = self._loss.get_embeddings_and_labels( embedded_anchors, embedded_positives) indices_tuple = self._miner( embeddings, labels) if self._miner is not None else None contrastive_loss = self._loss(embeddings, labels, indices_tuple) # Loss needs to be scaled by world size when using DistributedDataParallel # See: https://amsword.medium.com/gradient-backpropagation-with-torch-distributed-all-gather-9f3941a381f8 if util.is_distributed() and self._scale_fix: contrastive_loss *= dist.get_world_size() output_dict["loss"] += contrastive_loss # Loss may be derived from contrastive objective, MLM objective or both. if masked_lm_loss is not None: output_dict["loss"] += masked_lm_loss return output_dict
def forward( # type: ignore self, anchors: TextFieldTensors, positives: TextFieldTensors=None, label:torch.LongTensor=None ) -> Dict[str, torch.Tensor]: """ # Parameters tokens : TextFieldTensors From a `TextField` # Returns An output dictionary consisting of: embeddings : torch.FloatTensor A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the representation for the given `tokens` output by the encoder. The encoder is composed of: `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order. projections : torch.FloatTensor A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the non-linear projection of the learned representation for the given `anchor_tokens` output by the projection head. This field will only be included if `self._feedforward` is not `None`. loss : torch.FloatTensor, optional A scalar loss to be optimized. """ output_dict: Dict[str, torch.Tensor] = {} print(label) print("****************************************8") # If multiple anchors were sampled, we need to unpack them. anchors = unpack_batch(anchors) # Mask anchor input ids and get labels required for MLM. if self.training and self._masked_language_modeling: anchors = mask_tokens(anchors, self._tokenizer) # This is the textual representation learned by a model and used for downstream tasks. masked_lm_loss, embedded_anchors = self._forward_internal(anchors, output_dict) # If positives are supplied by DataLoader and we are training, compute a contrastive loss. if self.training: output_dict["loss"] = 0 # TODO: We should throw a ValueError if no postives provided by loss is not None. if self._loss is not None: # Like the anchors, if we sampled multiple positives, we need to unpack them. positives = unpack_batch(positives) # Positives are represented by their mean embedding a la # https://arxiv.org/abs/1902.09229. _, embedded_positives = self._forward_internal(positives) embedded_positive_chunks = [] for i, chunk in enumerate( torch.chunk(embedded_positives, chunks=embedded_anchors.size(0), dim=0) ): embedded_positive_chunks.append(torch.mean(chunk, dim=0)) embedded_positives = torch.stack(embedded_positive_chunks) # If we are training on multiple GPUs using DistributedDataParallel, then a naive # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per # GPU. To avoid this, we need to gather the anchors/positives from each replica on # every other replica in order to generate the correct number of negatives, # i.e. 2 * (batch_size - 1), before computing the contrastive loss. embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs( embedded_anchors, embedded_positives ) embedded_positives_1=embedded_positives[torch.nonzero(label==1).view(1,-1)[0],:] embedded_positives_0=embedded_positives[torch.nonzero(label==0).view(1,-1)[0],:] embedded_anchors_1=embedded_anchors[torch.nonzero(label==1).view(1,-1)[0],:] embedded_anchors_0=embedded_anchors[torch.nonzero(label==0).view(1,-1)[0],:] #print(embedded_positives_1.shape) #print(embedded_positives_0.shape) #print(embedded_anchors_1.shape) #print(embedded_anchors_0.shape) #embedded_anchors_0 = embedded_anchors_1 = embedded_anchors #embedded_positives_0 = embedded_positives_1 = embedded_positives # Get embeddings into the format that the PyTorch Metric Learning library expects # before computing the loss (with an optional mining step). print(embedded_positives_1.shape) print(embedded_anchors_1.shape) print("*********************************8") print(embedded_positives_0.shape) print(embedded_anchors_0.shape) if(embedded_positives_1.shape[0]>0 and embedded_positives_0.shape[0]>0): embeddings, labels, parties = self._loss.get_embeddings_and_labels( embedded_anchors_0, embedded_positives_0,embedded_anchors_1,embedded_positives_1 ) indices_tuple = self._miner(embeddings, labels) if self._miner is not None else None output_dict["loss"] += self._loss(embeddings, labels, indices_tuple=indices_tuple, parties=parties) else: output_dict["loss"]+=torch.zeros(1,device='cuda:0') #masked_lm_loss = None # Loss may be derived from contrastive objective, MLM objective or both. if masked_lm_loss is not None: output_dict["loss"] += masked_lm_loss return output_dict
def forward( # type: ignore self, anchors: TextFieldTensors, positives: TextFieldTensors = None, difficulty: LabelField = None, ) -> Dict[str, torch.Tensor]: """ # Parameters tokens : TextFieldTensors From a `TextField` # Returns An output dictionary consisting of: embeddings : torch.FloatTensor A tensor of shape `(batch_size, self._seq2vec_encoder.get_output_dim())`, which is the representation for the given `tokens` output by the encoder. The encoder is composed of: `self._text_field_embedder`, and `self._seq2vec_encoder`, in that order. projections : torch.FloatTensor A tensor of shape `(batch_size, self._feedforward.get_output_dim())`, which is the non-linear projection of the learned representation for the given `anchor_tokens` output by the projection head. This field will only be included if `self._feedforward` is not `None`. loss : torch.FloatTensor, optional A scalar loss to be optimized. """ output_dict: Dict[str, torch.Tensor] = {} # If multiple anchors were sampled, we need to unpack them. anchors = unpack_batch(anchors) # print("anchor token len is ", len(anchors["tokens"])) # Mask anchor input ids and get labels required for MLM. if self.training and self._masked_language_modeling: anchors = mask_tokens(anchors, self._tokenizer) # This is the textual representation learned by a model and used for downstream tasks. masked_lm_loss, embedded_anchors = self._forward_internal( anchors, -1, output_dict=output_dict) # self.iteration += 1 # print("instance from reader is ", difficulty, self.iteration) if difficulty is not None: difficulty_step = int(difficulty[0]) else: difficulty_step = -100 # If positives are supplied by DataLoader and we are training, compute a contrastive loss. if self.training: output_dict["loss"] = 0 # TODO: We should throw a ValueError if no postives provided by loss is not None. if self._loss is not None: # if difficulty_step > 5: # sampling_gate = random.randint(0,2) # else: # sampling_gate = 0 # sampling_gate = random.randint(0,1) if len(self.augment) == 0: # if difficulty_step > 5: # if True: # sampling_gate = random.randint(0,2) # if sampling_gate > 0: # print("enter sampling!!!!!") # Like the anchors, if we sampled multiple positives, we need to unpack them. positives = unpack_batch(positives) # Positives are represented by their mean embedding a la # https://arxiv.org/abs/1902.09229. _, embedded_positives = self._forward_internal( positives, -1, difficulty_step=difficulty_step) embedded_positive_chunks = [] for i, chunk in enumerate( torch.chunk(embedded_positives, chunks=embedded_anchors.size(0), dim=0)): embedded_positive_chunks.append( torch.mean(chunk, dim=0)) embedded_positives = torch.stack(embedded_positive_chunks) # If we are training on multiple GPUs using DistributedDataParallel, then a naive # application would result in 2 * (batch_size/n_gpus - 1) number of negatives per # GPU. To avoid this, we need to gather the anchors/positives from each replica on # every other replica in order to generate the correct number of negatives, # i.e. 2 * (batch_size - 1), before computing the contrastive loss. embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs( embedded_anchors, embedded_positives) # Get embeddings into the format that the PyTorch Metric Learning library expects # before computing the loss (with an optional mining step). else: augment = np.random.choice(self.augment, 1)[0] # print("augment difficulty is ", difficulty_step) # print("augment value is ~~~~~~~~~~~~~~~~~~~~", augment) # _, embedded_positives2 = self._forward_internal(anchors, augment, difficulty_step) _, embedded_positives = self._forward_internal( anchors, augment, difficulty_step=difficulty_step) # _, embedded_positives = self._forward_internal(anchors, augment, 2) # embedded_anchors, embedded_positives2 = all_gather_anchor_positive_pairs( # embedded_anchors, embedded_positives2 # ) embedded_anchors, embedded_positives = all_gather_anchor_positive_pairs( embedded_anchors, embedded_positives) # print("embedded_anchors", embedded_anchors) # print("embedded_positives",embedded_positives) # cos = nn.CosineSimilarity() # output = cos(embedded_anchors, embedded_positives) # print("cosine similarity is", output) embeddings, labels = self._loss.get_embeddings_and_labels( embedded_anchors, embedded_positives) indices_tuple = self._miner( embeddings, labels) if self._miner is not None else None output_dict["loss"] += self._loss(embeddings, labels, indices_tuple) # output_dict["loss"] += (self._loss(embeddings, labels, indices_tuple))*1/2 # embeddings2, labels2 = self._loss.get_embeddings_and_labels( # embedded_anchors, embedded_positives2 # ) # indices_tuple2 = self._miner(embeddings2, labels2) if self._miner is not None else None # output_dict["loss"] += (self._loss(embeddings2, labels2, indices_tuple2))*1/2 # print("contrastive loss is ", output_dict["loss"], self.iteration) # Loss may be derived from contrastive objective, MLM objective or both. if masked_lm_loss is not None: # print("mlm loss is ", masked_lm_loss) output_dict["loss"] += masked_lm_loss return output_dict