示例#1
0
    def load_graph_data(is_jdk=True, version="v1"):
        if is_jdk:
            graph_data_path = PathUtil.jdk_graph_data(version)
        else:
            graph_data_path = PathUtil.android_graph_data(version)

        return GraphData.load(graph_data_path)
示例#2
0
 def load_doc(project_name="android27", version="v1"):
     """
     project_name: jdk8 android27
     """
     document_collection_path = PathUtil.doc(pro_name=project_name,
                                             version=version)
     return MultiFieldDocumentCollection.load(document_collection_path)
示例#3
0
 def __init__(self, doc_collection):
     graph_data_path = PathUtil.graph_data(pro_name="jabref",
                                           version="v3.7")
     self.graph_data = GraphData.load(graph_data_path)
     self.doc_collection = doc_collection
     self.entity_words = set()
     self.entity_2_score = dict()
     self.counter = 0
     self.entity_path = str(Path(OUTPUT_DIR) / "entity.json")
示例#4
0
 def __init__(self,
              doc_collection,
              graph_data_path=PathUtil.graph_data(pro_name="jabref",
                                                  version="v3.10")):
     if isinstance(graph_data_path, GraphData):
         self.graph_data: GraphData = graph_data_path
     else:
         self.graph_data: GraphData = GraphData.load(graph_data_path)
     self.doc_collection = doc_collection
     self.functionClassifier = FastTextClassifier()
     self.G = nx.Graph(self.graph_data.graph)
示例#5
0
 def __init__(self, input_graph_version):
     self.save_expand_res_path = str(
         Path(OUTPUT_DIR) / "prefix_suffix_relations.pickle")
     self.api_id_2_record_text_path = str(
         Path(OUTPUT_DIR) / "api_id_2_record.pickle")
     self.api_id_2_record_text = Tool.load_pickle(
         self.api_id_2_record_text_path)
     graph_data_path = PathUtil.graph_data(pro_name="jabref",
                                           version=input_graph_version)
     self.graph_data = GraphData.load(graph_data_path)
     self.func_relation_set = {
         RelationNameConstant.has_Functionality_Relation,
         RelationNameConstant.Functionality_Compare_Relation,
         RelationNameConstant.has_Behavior_Relation,
     }
     self.concept_classification = {
         RelationNameConstant.Ontology_IS_A_Relation,
     }
     self.membership = {
         RelationNameConstant.Ontology_Derive_Relation,
     }
     self.characteristic = {
         RelationNameConstant.has_Feature_Relation,
         RelationNameConstant.has_Constraint_Relation,
     }
     self.category_name_2_id = dict()
     self.type_of_class = {
         CodeEntityCategory.CATEGORY_CLASS,
         CodeEntityCategory.CATEGORY_INTERFACE,
         CodeEntityCategory.CATEGORY_EXCEPTION_CLASS,
         CodeEntityCategory.CATEGORY_ERROR_CLASS,
         CodeEntityCategory.CATEGORY_ENUM_CLASS,
         CodeEntityCategory.CATEGORY_ANNOTATION_CLASS
     }
     self.type_of_method = {
         CodeEntityCategory.CATEGORY_METHOD,
         CodeEntityCategory.CATEGORY_CONSTRUCT_METHOD,
         CodeEntityCategory.CATEGORY_BASE_OVERRIDE_METHOD,
     }
     self.CODE_NAME_UTIL = CodeElementNameUtil()
示例#6
0
from sekg.graph.exporter.graph_data import NodeInfo, GraphData
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection, MultiFieldDocument

from project.utils.path_util import PathUtil

pro_name = "jabref"
doc_path = PathUtil.doc(pro_name=pro_name, version="v1")
graph_data_path = PathUtil.graph_data(pro_name="jabref", version="v1")
graph_data = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_path)
# e.g. org.jabref.model.metadata.event.MetaDataChangedEvent
api_name = "org.jabref.model.metadata.event.MetaDataChangedEvent"
node = graph_data.find_one_node_by_property(property_name='qualified_name',
                                            property_value=api_name)
api_id = node["id"]
doc: MultiFieldDocument = doc_collection.get_by_id(api_id)
return_data = dict()
return_data['doc_info'] = dict()
return_data['api_name'] = api_name
return_data['doc_info']['full_html_description'] = doc.get_doc_text_by_field(
    'full_html_description')
return_data['doc_info']['full_description'] = doc.get_doc_text_by_field(
    'full_description')
return_data['doc_info']['sentence_description'] = doc.get_doc_text_by_field(
    'sentence_description')
print(return_data)
示例#7
0
 def __init__(self):
     pro_name = "jabref"
     data_dir = PathUtil.doc(pro_name=pro_name, version="v3.3")
     self.doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
         data_dir)
示例#8
0
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from sekg.ir.doc.wrapper import MultiFieldDocument, MultiFieldDocumentCollection
import json
import definitions
from pathlib import Path

pro_name = 'jabref'
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version='v3.9')
doc_collection_path = PathUtil.doc(pro_name=pro_name, version='v3.1')
doc_collection_save_path = PathUtil.doc(pro_name=pro_name, version='v3.2')
api_to_example_json_path = Path(
    definitions.ROOT_DIR) / "output" / "json" / "api_2_example_sorted.json"
mid_to_method_info_json_path = Path(
    definitions.ROOT_DIR
) / "output" / "json" / "mid_2_method_info_without_comment.json"
graph_data: GraphData = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_collection_path)
'''
doc文件抽取样例代码
'''


def find_doc(qualified_name):
    node: NodeInfo = graph_data.find_one_node_by_property(
        property_name='qualified_name', property_value=qualified_name)
    if node is None:
        node: NodeInfo = graph_data.find_one_node_by_property_value_starts_with(
            property_name='qualified_name',
            property_value_starter=qualified_name)
from project.utils.path_util import PathUtil
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection, MultiFieldDocument
from sekg.graph.exporter.graph_data import GraphData
from sekg.graph.exporter.graph_data import NodeInfo
import json
from definitions import OUTPUT_DIR
from pathlib import Path

pro_name = 'jabref'
dc_file_location = PathUtil.doc(pro_name=pro_name, version='v1')
graph_data_file_location = PathUtil.graph_data(pro_name=pro_name,
                                               version='v1.8')
dc_file_destination = PathUtil.doc(pro_name=pro_name, version='v1.1')
comment_json_file = Path(OUTPUT_DIR) / "json" / "mid_2_dp_comment.json"
qualified_name_json_file = Path(
    OUTPUT_DIR) / "json" / "mid_2_qualified_name.json"

if __name__ == '__main__':
    doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
        dc_file_location)
    graph_data: GraphData = GraphData.load(graph_data_file_location)

    comment_list = []
    comments = open(comment_json_file, 'r').readlines()
    for line in comments:
        comment_list.append(json.loads(line))

    qualified_name_list = []
    names = open(qualified_name_json_file, 'r').readlines()
    for line in names:
        qualified_name_list.append(json.loads(line))
示例#10
0
from flask import Flask, request, jsonify
from flask_cors import CORS
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection, MultiFieldDocument
from sekg.graph.exporter.graph_data import GraphData, NodeInfo

from project.knowledge_service import KnowledgeService
from project.doc_service import DocService
from project.json_service import JsonService
from project.utils.path_util import PathUtil

app = Flask(__name__)
cors = CORS(app, resources={r"/*": {"origins": "*"}})
pro_name = "jabref"
data_dir = PathUtil.doc(pro_name=pro_name, version="v1.2")
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v1.8")
graph_data: GraphData = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    data_dir)
knowledge_service = KnowledgeService(doc_collection)
doc_service = DocService()
json_service = JsonService()


@app.route('/')
def hello():
    return 'success'


# search doc info according to method name
@app.route('/get_doc/', methods=["GET", "POST"])
def doc_info():
示例#11
0
                node_id)
            if node_doc:
                full_description = node_doc.get_doc_text_by_field(
                    'full_description')
                for concept_list_item in self.concepts_list:
                    concept_node_id = -1
                    for concept in concept_list_item:
                        if concept in self.concept_2_node_id:
                            concept_node_id = self.concept_2_node_id[concept]
                            break
                    if concept_node_id >= 0:
                        for concept in concept_list_item:
                            if concept in full_description:
                                self.graph.add_relation(
                                    node_id, "has concept", concept_node_id)
                                break
        print("relation添加完毕")


if __name__ == "__main__":
    concept_and_relation_path = Path(DATA_DIR) / "concept_and_relation"
    concept_2_graph = Concept2Graph(
        PathUtil.graph_data("jabref", "v3.8"),
        PathUtil.doc(pro_name="jabref", version='v3.3'),
        str(concept_and_relation_path / "concepts.json"),
        str(concept_and_relation_path / "relations.json"))
    concept_2_graph.add_concept_2_graph()
    concept_2_graph.add_relation_2_graph()
    concept_2_graph.graph.save(PathUtil.graph_data("jabref", "v3.9"))
    print("图导入完成")
示例#12
0
from pathlib import Path

from sekg.pipeline.base import KGBuildPipeline

from definitions import OUTPUT_DIR
from project.extractor_module.structure_extractor.characteristic_structure_extractor import CharacteristicStructureExtractor
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = CharacteristicStructureExtractor()
    component1.set_json_save_path(Path(OUTPUT_DIR) / "json" / "name_characteristic.json")
    component1.set_save_path(PathUtil.graph_data(pro_name=pro_name, version="v1.1"))
    pipeline.add_component("从名称和结构中抽取特征", component1)
    pipeline.run()
示例#13
0
        label_info = {"entity"}
        type_class = self.get_record_entity_type_by_relation(statement.r_name)
        label_info.add(str(type_class.LABEL))
        label_info.add(str("statement"))
        node_properties = {
            type_class.PRIMARY_PROPERTY_NAME: statement.e_name,
        }
        for extra_info_key in statement.extra_info:
            node_properties[extra_info_key] = statement.extra_info[
                extra_info_key]
        node_properties["which_extractor"] = statement.which_extractor
        node_properties["e_type"] = statement.e_type
        node_properties["s_name"] = statement.s_name
        node_properties["r_name"] = statement.r_name
        graph_id = self.graph_data.add_node(
            label_info,
            node_properties,
            primary_property_name=type_class.PRIMARY_PROPERTY_NAME)
        return graph_id


if __name__ == '__main__':
    start_time = time.asctime(time.localtime(time.time()))
    print(start_time)
    api_diff_graph_builder = APIDiffGraphBuilder(input_graph_version="v1.4")
    api_diff_graph_builder.build_simple_graph()
    api_diff_graph_builder.graph_data.save(
        PathUtil.graph_data(pro_name="jabref", version="v1.5"))
    end_time = time.asctime(time.localtime(time.time()))
    print(end_time)
示例#14
0
            return "No sample code available."
        sample_code = doc.get_doc_text_by_field('sample_code')
        if len(sample_code) == 0 or sample_code is None:
            return "No sample code available."
        else:
            return sample_code[0][2:]

    # 返回相关api
    def get_related_api(self, qualified_name):
        result = dict()
        api_id = self.get_api_id_by_name(qualified_name)
        node: NodeInfo = self.graph_data.find_nodes_by_ids(api_id)[0]
        related_api = list()
        related_api_simplified = list()
        related_api = node['properties']['simrank']
        for i in related_api:
            related_api_simplified.append(i[i.rfind('.') + 1:])
        result['related_api'] = related_api
        result['related_api_simplified'] = related_api_simplified
        return result


if __name__ == '__main__':
    pro_name = "jabref"
    data_dir = PathUtil.doc(pro_name=pro_name, version="v3.3")
    doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
        data_dir)

    knowledge_service = KnowledgeService(doc_collection)
    print(knowledge_service.get_related_api("org.jabref.model.entry.BibEntry"))
示例#15
0
from sekg.ir.doc.wrapper import MultiFieldDocumentCollection
from sekg.graph.exporter.graph_data import GraphData, NodeInfo

from project.knowledge_service import KnowledgeService
from project.doc_service import DocService
from project.json_service import JsonService
from project.utils.path_util import PathUtil
from pathlib import Path
import definitions
import json

app = Flask(__name__)
cors = CORS(app, resources={r"/*": {"origins": "*"}})

pro_name = "jabref"
doc_dir = PathUtil.doc(pro_name=pro_name, version="v3.3")
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.10")
graph_data: GraphData = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_dir)
simple_qualified_name_map_path = Path(
    definitions.ROOT_DIR) / "output" / "simple_qualified_name_map.json"

knowledge_service = KnowledgeService(doc_collection, graph_data)
doc_service = DocService()
json_service = JsonService()
with open(simple_qualified_name_map_path, 'r') as f:
    json_str = f.read()
simple_qualified_name_map = json.loads(json_str)
print("load complete")
from project.utils.path_util import PathUtil
from pathlib import Path
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from sekg.ir.doc.wrapper import MultiFieldDocument, MultiFieldDocumentCollection
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.cluster import KMeans
import definitions
import json
'''
将样例代码进行聚类划分并输出
'''

pro_name = "jabref"
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.9")
doc_collection_path = PathUtil.doc(pro_name=pro_name, version="v3.2")
doc_collection_save_path = PathUtil.doc(pro_name=pro_name, version="v3.3")
api_to_example_json_path = Path(
    definitions.ROOT_DIR) / "output" / "json" / "api_2_example_sorted.json"
mid_to_method_info_json_path = Path(
    definitions.ROOT_DIR
) / "output" / "json" / "mid_2_method_info_without_comment.json"

graph_data: GraphData = GraphData.load(graph_data_path)
doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
    doc_collection_path)

# 读取sample code文件. api_to_mid: 每个api对应的sample code的mid. methods_info: 每个mid对应的代码
with open(api_to_example_json_path, 'r') as f:
    api_to_mid = json.load(f)
f.close()
示例#17
0
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.classification_module import method_classification
from nltk.corpus import wordnet as wn
from project.classification_module.method_classification import split

if __name__ == '__main__':
    # 1. 得到图中所有方法节点 2. qualified_name传入classification中做判断
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v1.6")
    graph_data: GraphData = GraphData.load(graph_data_path)
    graph_data_output_path = PathUtil.graph_data(pro_name=pro_name,
                                                 version='v1.7')
    methods_id: set = graph_data.get_node_ids_by_label("method")
    nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
    verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}

    count = [0, 0, 0, 0, 0]
    for i in iter(methods_id):
        node: NodeInfo = graph_data.find_nodes_by_ids(i)[0]
        qualified_name = node['properties']['qualified_name']
        label = method_classification.basic_classification(qualified_name)
        if label is "undefined":
            first_word = split(camel_case=qualified_name)
            if first_word in verbs:
                label = "mutator"
            if first_word in nouns:
                label = "accessor"

        if label is "accessor":
            count[0] += 1
示例#18
0
import networkx as nx

from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v1.3")
    output_path = PathUtil.graph_data(pro_name=pro_name, version="v1.4")

    graph_data: GraphData = GraphData.load(graph_data_path)
    nx_graph = nx.Graph(graph_data.graph)
    result = nx.pagerank(nx_graph)
    # todo: result是一个字典格式{node_id: pr_value},将其插入graph相应节点的node['properties']['pr_value']中
    for i in range(41167):
        node: NodeInfo = graph_data.find_nodes_by_ids(i + 1)[0]
        node["properties"]["pr_value"] = result[i + 1]
    graph_data.save(output_path)
"""

对方法进行分类
将方法分为: accessor, mutator, creational, constructor, undefined五类

"""

from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.utils.path_util import PathUtil
from nltk.corpus import wordnet as wn

pro_name = "jabref"
graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.4")
graph_data: GraphData = GraphData.load(graph_data_path)

accessor_key_word = ("get", "toString", "find", "search", "test", "contains", "is", "has", "show")
mutator_key_word = ("set", "add", "delete", "move", "remove", "parse", "insert", "extract", "open")
creational_key_word = ("copy", "construct", "create")
nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}


def get_pure_method_name_without_parameter(qualified_name=None):
    if qualified_name is None or qualified_name is "":
        raise ValueError("qualified name needed")
    qualified_name = qualified_name[:qualified_name.find("(")]
    result = qualified_name[qualified_name.rfind(".")+1:]
    return result


# 根据一系列的key word去做最基本的划分
示例#20
0
from project.utils.path_util import PathUtil
from sekg.graph.exporter.graph_data import GraphData, NodeInfo
from project.classification_module import method_classification
from nltk.corpus import wordnet as wn
from project.classification_module.method_classification import split
'''
对method进行分类
'''

if __name__ == '__main__':
    # 1. 得到图中所有方法节点 2. qualified_name传入classification中做判断
    pro_name = "jabref"
    graph_data_path = PathUtil.graph_data(pro_name=pro_name, version="v3.4")
    graph_data: GraphData = GraphData.load(graph_data_path)
    graph_data_output_path = PathUtil.graph_data(pro_name=pro_name,
                                                 version='v3.5')
    methods_id: set = graph_data.get_node_ids_by_label("method")
    nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
    verbs = {x.name().split('.', 1)[0] for x in wn.all_synsets('v')}

    count = [0, 0, 0, 0, 0]
    for i in iter(methods_id):
        node: NodeInfo = graph_data.find_nodes_by_ids(i)[0]
        qualified_name = node['properties']['qualified_name']
        label = method_classification.basic_classification(qualified_name)
        if label is "undefined":
            first_word = split(camel_case=qualified_name)
            if first_word in verbs:
                label = "mutator"
            if first_word in nouns:
                label = "accessor"
示例#21
0
from pathlib import Path

from sekg.pipeline.base import KGBuildPipeline

from definitions import OUTPUT_DIR
from project.extractor_module.structure_extractor.category_structure_extractor import CategoryStructureExtractor
from project.extractor_module.structure_extractor.characteristic_structure_extractor import CharacteristicStructureExtractor
from project.extractor_module.structure_extractor.func_name_extractor import FuncNameExtractor
from project.utils.path_util import PathUtil

if __name__ == '__main__':
    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = CharacteristicStructureExtractor()
    component1.set_json_save_path(
        Path(OUTPUT_DIR) / "json" / "name_characteristic.json")
    component1.set_save_path(
        PathUtil.graph_data(pro_name=pro_name, version="v1.1"))
    pipeline.add_component("从名称和结构中抽取特征", component1)
    pipeline.run()

    pipeline = KGBuildPipeline()
    pro_name = "jabref"
    graph_data_v1_path = PathUtil.graph_data(pro_name=pro_name, version="v1.1")
    pipeline.load_graph(graph_data_v1_path)
    component1 = FuncNameExtractor()
    component1.set_json_save_path(
        Path(OUTPUT_DIR) / "json" / "name_functionality.json")
    component1.set_save_path(
示例#22
0
            print(self.counter)
            self.graph_data.add_relation(start_id, relation_str, end_id)
        except Exception as e:
            print(e)

    def load_entity_words(self):
        load_dict = self.load_json(self.entity_path)
        for each in load_dict:
            self.entity_words.add(each["entity_name"])
            self.entity_2_score[each["entity_name"]] = each["tf_idf"]

    def load_json(self, path):
        with open(path, "r") as load_f:
            load_dict = json.load(load_f)
            return load_dict

    def save_graph(self, output_path):
        self.graph_data.save(output_path)


if __name__ == '__main__':
    pro_name = "jabref"
    data_dir = PathUtil.doc(pro_name=pro_name, version="v3.3")
    doc_collection: MultiFieldDocumentCollection = MultiFieldDocumentCollection.load(
        data_dir)
    entity_service = EntityService(doc_collection)
    entity_service.link_all_api_entity()
    entity_service.save_graph(
        str(PathUtil.graph_data(pro_name="jabref", version="v3.8")))
    print("counter:" + str(entity_service.counter))