示例#1
0
 def unsplit_by_doc(self,
                    arrays: List[List[Floats3d]]) -> "FullTransformerBatch":
     xp = get_array_module(arrays[0][0])
     return FullTransformerBatch(
         spans=self.spans,
         tokens=self.tokens,
         tensors=[xp2torch(xp.vstack(x)) for x in transpose_list(arrays)],
         align=self.align,
     )
示例#2
0
def cosine_similarity(vec1, vec2) -> float:
    """Compute the cosine similarity of two vectors."""
    if vec1.all() == 0 or vec2.all() == 0:
        return 0.0
    xp = get_array_module(vec1)
    norm1 = xp.linalg.norm(vec1)
    norm2 = xp.linalg.norm(vec2)
    if norm1 == norm2:
        return 1.0
    return xp.dot(vec1, vec2) / (norm1 * norm2)
示例#3
0
    def unsplit_by_doc(self,
                       arrays: List[List[Floats3d]]) -> "FullTransformerBatch":
        """Return a new FullTransformerBatch from a split batch of activations,
        using the current object's spans, wordpieces and alignment.

        This is used during the backward pass, in order to construct the gradients
        to pass back into the transformer model.
        """
        xp = get_array_module(arrays[0][0])
        return FullTransformerBatch(
            spans=self.spans,
            wordpieces=self.wordpieces,
            tensors=[xp2torch(xp.vstack(x)) for x in transpose_list(arrays)],
            align=self.align,
        )
    def predict(self, docs: Iterable[Doc]):
        """Apply the pipeline's model to a batch of docs, without modifying them.

        docs (Iterable[Doc]): The documents to predict.
        RETURNS: The models prediction for each document.

        DOCS: https://spacy.io/api/textcategorizer#predict
        """
        if not any(len(doc) for doc in docs):
            # Handle cases where there are no tokens in any docs.
            tensors = [doc.tensor for doc in docs]
            xp = get_array_module(tensors)
            scores = xp.zeros((len(docs), len(self.labels)))
            return scores
        scores = self.model.predict(docs)
        scores = self.model.ops.asarray(scores)
        return scores
    def unsplit_by_doc(self,
                       arrays: List[List[Floats3d]]) -> "FullTransformerBatch":
        """Return a new FullTransformerBatch from a split batch of activations,
        using the current object's spans, wordpieces and alignment.

        This is used during the backward pass, in order to construct the gradients
        to pass back into the transformer model.
        """
        xp = get_array_module(arrays[0][0])
        # construct a dummy ModelOutput with the tensor values
        model_output = ModelOutput()
        for i, x in enumerate(transpose_list(arrays)):
            model_output[f"output_{i}"] = xp2torch(xp.vstack(x))
        return FullTransformerBatch(
            spans=self.spans,
            wordpieces=self.wordpieces,
            model_output=model_output,
            align=self.align,
        )