Пример #1
0
def train_name_searcher(pro_name, version):
    print("train graph name searcher for %s at version %s" % (pro_name, version))
    name_searcher_path = PathUtil.name_searcher(pro_name=pro_name, version=version)

    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version=version)

    searcher = KGNameSearcher.train_from_graph_data_file(graph_data_path=graph_data_path,
                                                         node_info_factory=ProjectKGNodeInfoFactory())
    searcher.save(name_searcher_path)
    print("finish... save to %s" % name_searcher_path)
Пример #2
0
 def __init__(self, pro_name, version):
     self.model_dir_path = PathUtil.sim_model(pro_name=pro_name,
                                              version=version,
                                              model_type="svm")
     self.model = FilterSemanticTFIDFNode2VectorModel(
         name="svm", model_dir_path=self.model_dir_path)
     self.document_collection_path = PathUtil.doc(pro_name, version)
     self.collection = MultiFieldDocumentCollection.load(
         str(self.document_collection_path))
     self.processor = Preprocessor()
     self.doc_collection = PreprocessMultiFieldDocumentCollection.create_from_doc_collection(
         self.processor, self.collection)
     self.pretrain_node2vec_path = PathUtil.node2vec(pro_name=pro_name,
                                                     version=version,
                                                     weight="unweight")
     self.kg_name_searcher_path = PathUtil.name_searcher(pro_name, version)
     self.doc_sim_model_path = PathUtil.sim_model(pro_name=pro_name,
                                                  version=version,
                                                  model_type="avg_w2v")
Пример #3
0
def train_model(pro_name, version, weight):
    document_collection_path = PathUtil.doc(pro_name, version)
    collection = MultiFieldDocumentCollection.load(str(document_collection_path))
    processor = CodeDocPreprocessor()
    doc_collection = PreprocessMultiFieldDocumentCollection.create_from_doc_collection(processor, collection)

    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version=version)

    pretrain_node2vec_path = PathUtil.node2vec(pro_name=pro_name, version=version, weight=weight)

    embedding_size = 100

    kg_name_searcher_path = PathUtil.name_searcher(pro_name=pro_name, version=version)

    model_dir_path = PathUtil.sim_model(pro_name=pro_name, version=version, model_type="avg_n2v")
    model = AVGNode2VectorModel.train(model_dir_path=model_dir_path,
                                      doc_collection=doc_collection,
                                      embedding_size=embedding_size,
                                      pretrain_node2vec_path=pretrain_node2vec_path,
                                      graph_data_path=graph_data_path,
                                      kg_name_searcher_path=kg_name_searcher_path,
                                      )
    return model_dir_path