Exemplo n.º 1
0
 def convert_doc_to_vector(self, model: doc2vec.Doc2Vec,
                           token_series: pd.Series, outpath):
     # Convert document tokens to numerical vector
     doc_mat = np.vstack([model.infer_vector(doc) for doc in token_series])
     # Build dataframe with above document vector
     df = pd.DataFrame(data=doc_mat, index=token_series.index)
     df.to_pickle(outpath)
Exemplo n.º 2
0
def _infer_document_embeddings(model: Doc2Vec,
                               doc_list: List[List[str]]) -> np.ndarray:
    """
    NOTE: Inference is not deterministic therefore representations will vary between calls
    Returns a 2D array with shape (num_docs, embedding_dimension)
    """
    print('Infering document embeddings..')
    return np.array([model.infer_vector(doc) for doc in doc_list])
Exemplo n.º 3
0
def getAllSentenceEmbeddings(model: Doc2Vec, taggedDocuments: Sequence[TaggedDocument], wordsToEmbed: Path) -> Iterable[SentenceEmbeddings]:
    sentenceEmbeddings = []
    for word in readTargetWords(wordsToEmbed):
        word = word.lower()
        arr = []

        # creating the sentence embeddings for each target word
        for corpusDocument in getDocumentsContaining(word, taggedDocuments):
            embedding = model.infer_vector(corpusDocument.sentence)
            arr.append((corpusDocument.corpusId, embedding))

        embeddingsPerCorpus = {}
        for corpusId, embedding in arr:
            embeddingsPerCorpus.setdefault(corpusId, []).append(embedding)
        sentenceEmbeddings.append(SentenceEmbeddings(word, embeddingsPerCorpus))
    yield from sentenceEmbeddings
Exemplo n.º 4
0
def method_row_vec(method_feat: MethodFeature, source_feat: MethodFeature,
                   pos_feat: PositionFeature, proj_feat: ProjectFeature,
                   model: Doc2Vec):
    return np.concatenate([
        model.infer_vector(method_feat.docs),
        [
            source_feat.exception_id, method_feat.package_depth,
            method_feat.param_num
        ],
        [
            pos_feat.method_top, pos_feat.method_bottom, pos_feat.class_top,
            pos_feat.class_bottom, pos_feat.package_top,
            pos_feat.package_bottom
        ],
        proj_feat.dependencies_vec,
        proj_feat.abstract_vec,
        np.zeros(shape=(1, ))  # FIXME 不知道为什么要加个 0 在这里
    ])
Exemplo n.º 5
0
def make_abstract_vec(project_folder: str, readme_path: Optional[str],
                      model: Doc2Vec) -> np.ndarray:
    tokens = abstract_to_tokens(project_folder, readme_path)
    return model.infer_vector(tokens)