Exemplo n.º 1
0
    def load(self, text: List[str]) -> np.ndarray:
        """
        Function that extracts from the embeddings model
        the vectors of the words contained in text

        Args:
            text (list<str>): list of words of which vectors will be extracted

        Returns:
            embedding_matrix (np.ndarray): bi-dimensional numpy vector, each row is a term vector
        """
        embedding_matrix = np.ndarray(shape=(len(text),
                                             self.get_vector_size()))

        text = check_tokenized(text)

        for i, word in enumerate(text):
            word = word.lower()
            try:
                embedding_matrix[i, :] = self.get_model().get_word_vector(word)
            except KeyError:
                embedding_matrix[i, :] = np.zeros(self.get_vector_size())

        return embedding_matrix
 def process_data_granularity(self, doc_data: str) -> List[str]:
     return check_tokenized(doc_data)
Exemplo n.º 3
0
 def process_data_granularity(
         self, field_data: Union[List[str], str]) -> List[str]:
     return check_tokenized(field_data)