def __init__(self):
     self.question_preprossor = QuestionPreprossor()
     self.question_analyzer = QuestionAnalyzer()
     self.candidate_answer_generator = CandidateAnswerSetGenerator()
     # self.answer_generator = AnswerGenerator()
     self.answer_generator = None
     self.client = DefaultGraphAccessor(GraphClient())
Пример #2
0
    def init(self, path="word2vec_api_software_wiki.txt", binary=True):
        self.session = EngineFactory.create_session()
        self.graphClient = DefaultGraphAccessor(GraphClient(server_number=4))

        self.entity_vector_model = EntityVectorComputeModel()
        self.entity_vector_model.init_word2vec_model(path=path, binary=binary)
        print("init complete")
    def test_expand_nodes_with_filter_nodes(self):
        graphClient = DefaultGraphAccessor(GraphClient())

        # test_case = [(55730, True, 50, 50), (15555, True, 4, 4), (93008, True, 10, 11), (1708, True, 8, 7)]
        test_case = [
            (55730, True, 50, 50),
        ]
        graphJsonParser = GraphJsonParser()
        for node_id, is_valid, node_num, relation_num in test_case:
            print("test case=", node_id, is_valid, node_num, relation_num)
            subgraph = graphClient.expand_node_for_adjacent_nodes_to_subgraph(
                node_id)
            subgraph_json = graphJsonParser.parse_subgraph_to_public_json(
                subgraph)
            print(subgraph_json)
            if is_valid:
                self.assertNotEqual(subgraph_json, {
                    "nodes": [],
                    "relations": []
                })
            else:
                self.assertEqual(subgraph_json, {"nodes": [], "relations": []})
                continue
            self.assertEqual(node_num, len(subgraph_json["nodes"]))
            self.assertEqual(relation_num, len(subgraph_json["relations"]))

            for n in subgraph_json["nodes"]:
                print(n)
            for r in subgraph_json["relations"]:
                print(r)
Пример #4
0
    def get_all_nodes(self, step=5000, labels=None):
        total_node_list = []
        self.graph_accessor = DefaultGraphAccessor(self._graph)
        if labels is None:
            labels = []

        if labels:
            max_id = self.graph_accessor.get_max_id_for_labels(*labels)
            min_id = self.graph_accessor.get_min_id_for_labels(*labels)
        else:
            max_id = self.graph_accessor.get_max_id_for_node()
            min_id = 0

        iteration = range(min_id, max_id, step)
        for start_id in iteration:
            try:
                end_id = min(start_id + step, max_id)
                nodes_in_scope = self.get_nodes_in_scope(start_id, end_id, labels)
                _logger.info("start id=%s,end_id=%s", str(start_id), str(end_id))
                if nodes_in_scope is not None:
                    _logger.info("get nodes in scope successfully")
                    total_node_list.extend(nodes_in_scope)
                else:
                    _logger.info("get nodes in scope failed")
            except Exception, error:
                _logger.exception("")
    def test_clean_labels(self):
        self.graphClient = DefaultGraphAccessor(GraphClient(server_number=1))

        node = self.graphClient.find_node_by_id(16)
        self.assertEqual(NodeCleaner.clean_labels(node), [u'software', u'background knowledge', u'WikiData'])
        node = self.graphClient.find_node_by_id(177777)
        print node
        self.assertEqual(NodeCleaner.clean_labels(node), [u'background knowledge', u'WikiData'])
    def test_sort_nodes_by_quality(self):
        graphClient = DefaultGraphAccessor(GraphClient(server_number=1))
        graphJson = GraphJsonParser()
        keyword = "java"
        top_number = 10
        subgraph = graphClient.search_nodes_by_name_in_subgraph(
            keyword, top_number)
        print subgraph
        nodes = graphJson.parse_nodes_in_subgraph_to_public_json(subgraph)

        print nodes
    def build_aliases_for_domain_entity(self):

        EntityForQA.delete_names_by_source(session=self.session, source="domain entity")

        client = GraphClient(server_number=4)
        accessor = DomainEntityAccessor(client)
        default_accessor = DefaultGraphAccessor(client)
        domain_entity_list = accessor.get_all_domain_entity()
        for domain_entity in domain_entity_list:
            entity = EntityForQA(kg_id=default_accessor.get_id_for_node(node=domain_entity),
                                 entity_id=domain_entity['domain_entity_id'], source="domain entity",
                                 attr='domain_entity_id', attr_value=domain_entity['domain_entity:name'])

            self.session.add(entity)
        self.session.commit()
    def init(self, vector_dir_path="./model/"):
        self.kg_models = KnowledgeGraphFeafureModels()
        self.kg_models.init(vector_dir_path=vector_dir_path)

        self._session = EngineFactory.create_session(echo=False)
        self._entity_extractor = EntityExtractor()

        # self._tf_idf_model = TFIDFModel()
        # self._tf_idf_model.load(dict_type=2)

        self.qa_searcher = QAEntitySearcher()
        client = GraphClient(server_number=4)
        self.semanticSearchAccessor = SemanticSearchAccessor(client)
        self.defaultAccessor = DefaultGraphAccessor(client)
        self._logger = Logger("QAResultSearch").get_log()
    def init_graph_index(self):
        if self.graph_client is None:
            print("graphClient is None")
            return
        accessor = DefaultGraphAccessor(graph=self.graph_client)
        accessor.create_index("CREATE INDEX ON :`api`(`api_id`)")
        accessor.create_index("CREATE INDEX ON :`entity`(`api_id`)")
        accessor.create_index("CREATE INDEX ON :`wikidata`(`wd_item_id`)")
        accessor.create_index("CREATE INDEX ON :`entity`(`wd_item_id`)")
        accessor.create_index("CREATE INDEX ON :`entity`(`site:enwiki`)")
        accessor.create_index("CREATE INDEX ON :`wikipedia`(`site:enwiki`)")
        accessor.create_index("CREATE INDEX ON :`wikidata`(`site:enwiki`)")
        accessor.create_index("CREATE INDEX ON :`entity`(`sentence_id`)")
        accessor.create_index("CREATE INDEX ON :`sentence`(`sentence_id`)")

        accessor.create_index("CREATE INDEX ON :`entity`(`domain_entity_id`)")
        accessor.create_index(
            "CREATE INDEX ON :`domain entity`(`domain_entity_id`)")
Пример #10
0
 def walk(self):
     '''
     '''
     file = open('log1.txt', 'w')
     client = DefaultGraphAccessor()
     iterator = 0
     possibility_list = [0] * RandomWalk.max_id
     adjacent_node_list = [0] * RandomWalk.max_id
     file.writelines("begin:  " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + '\n')
     file.flush()
     while iterator < self.generationNumber:
         file.writelines("iteration:   " + str(iterator) + "   " + time.strftime('%Y-%m-%d %H:%M:%S',
                                                                                 time.localtime(time.time())) + '\n')
         file.flush()
         # print str(iterator) + "   " + time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
         current_node_index = 1
         step_length_list = [0] * RandomWalk.max_id
         for i in range(0, self.stepNumber):
             if (current_node_index > 0 and current_node_index < 7520) or current_node_index > 7524:
                 if adjacent_node_list[current_node_index - 1] == 0:
                     adjacent_node = client.get_adjacent_node_id_list(current_node_index)
                     adjacent_node_list[current_node_index - 1] = adjacent_node
                 else:
                     adjacent_node = adjacent_node_list[current_node_index - 1]
                 next_node_index = adjacent_node[random.randint(0, len(adjacent_node) - 1)]
                 # print str(iterator) + "   " + str(i) + "   " + str(len(adjacent_node)) + "    " + str(
                 # next_node_index)
                 if (next_node_index > 1 and next_node_index < 7520) or next_node_index > 7524:
                     current_node_index = next_node_index
                 if step_length_list[next_node_index - 1] == 0 and next_node_index != 1:
                     step_length_list[next_node_index - 1] = i + 1
                     possibility_list[next_node_index - 1] += 1 - (
                     step_length_list[next_node_index - 1] / float(self.stepNumber))
         iterator += 1
     file.writelines("end:  " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + '\n')
     file.flush()
     file.close()
     for i in range(0, RandomWalk.max_id):
         possibility_list[i] /= self.generationNumber
         if possibility_list[i] > 0:
             print str(i) + "  " + str(possibility_list[i])
    def init(self, vector_dir_path="./model/", ):
        time_start = time.time()
        print("start init the model=%d" % time_start)
        client = GraphClient(server_number=4)

        self.defaultAccessor = DefaultGraphAccessor(client)

        self._api_wv = EntityVectorModel.load(vector_dir_path + self.WORD2VEC_FILE_LIST["api"], binary=True)
        self._domain_entity_wv = EntityVectorModel.load(vector_dir_path + self.WORD2VEC_FILE_LIST["domain entity"],
                                                        binary=True)
        self._wiki_wv = EntityVectorModel.load(vector_dir_path + self.WORD2VEC_FILE_LIST["wikidata"], binary=True)
        self._sentence_wv = EntityVectorModel.load(vector_dir_path + self.WORD2VEC_FILE_LIST["sentence"], binary=True)
        self._graph_wv = EntityVectorModel.load(vector_dir_path + self.WORD2VEC_FILE_LIST["graph"], binary=True)
        self._entity_vector_compute_model = EntityVectorComputeModel()
        self._entity_vector_compute_model.init_word2vec_model(vector_dir_path + self.WORD2VEC_FILE_LIST["word2vec"],
                                                              binary=True)
        self.NP_VECTOR_NOT_EXIST = np.zeros(128)
        self.NP_VECTOR_NOT_EXIST[1] = 1e-07

        time_end = time.time()
        print("init complete in %d" % (time_end - time_start))
Пример #12
0
    def start_import_for_api_entity(self, linking_result_file):
        graph_client = GraphClient(server_number=4)
        default_graph_client = DefaultGraphAccessor(graph_client)
        api_entity_graph_client = DomainEntityAccessor(graph_client)
        api_entity_graph_client.delete_all_api_entity_to_wikipedia_relation()
        print("delete all old may link relation complete")

        with open(linking_result_file, 'r') as f:
            link_relation_list = json.load(f)

        for each in link_relation_list:
            api_entity_id = each['api_entity_id']
            wikipedia_entity_id = each['wikipedia_entity_id']
            if api_entity_id is None or wikipedia_entity_id is None:
                continue
            api_entity = api_entity_graph_client.find_api_entity_node_by_id(api_entity_id)
            if api_entity is None:
                continue
            wikipedia_entity = default_graph_client.find_node_by_id(wikipedia_entity_id)
            if wikipedia_entity is None:
                continue
            api_entity_graph_client.create_entity_to_general_concept_relation(api_entity, wikipedia_entity)
Пример #13
0
from skgraph.graph.accessor.factory import NodeBuilder
# Mysql connect
from skgraph.graph.accessor.graph_accessor import DefaultGraphAccessor, GraphClient
from skgraph.util.code_text_process import clean_html_text

conn = MySQLdb.connect(host='10.141.221.73',
                       port=3306,
                       user='******',
                       passwd='root',
                       db='fdroid',
                       charset="utf8")
cur = conn.cursor()

# neo4j connect
connect_graph = DefaultGraphAccessor(GraphClient(server_number=1)).graph


# read from mysql
def mySQLReader(start, end):
    node1lib = connect_graph.find_one(label="schema",
                                      property_key="wd_item_id",
                                      property_value="Q21127166")

    # get all-version library
    cur.execute(
        "select * from jdk_library where library_id >= %s and library_id < %s",
        (start, end))
    lib_sql_data_list = cur.fetchall()

    for lib_node_mysql_data_index in range(0, len(lib_sql_data_list)):
Пример #14
0
import codecs
import json

from py2neo import Relationship

from skgraph.graph.accessor.graph_accessor import GraphClient, DefaultGraphAccessor
from skgraph.graph.accessor.graph_client_for_awesome import AwesomeGraphAccessor
from skgraph.graph.accessor.graph_client_for_wikipedia import WikipediaGraphAccessor
from shared.logger_util import Logger

_logger = Logger("AwesomeImporter").get_log()

awesomeGraphAccessor = AwesomeGraphAccessor(GraphClient(server_number=0))
wikipediaGraphAccessor = WikipediaGraphAccessor(awesomeGraphAccessor)
defaultGraphAccessor = DefaultGraphAccessor(awesomeGraphAccessor)

baseGraphClient = awesomeGraphAccessor.graph

file_name = "awesome_item_category_related_to_wikipedia_relation_list.json"
with codecs.open(file_name, 'r', 'utf-8') as f:
    relation_list = json.load(f)
for tag_relation in relation_list:
    start_entity_name = tag_relation["start_entity_name"]
    relation = tag_relation["relation"]
    end_url = tag_relation["end_url"]
    start_node = awesomeGraphAccessor.find_awesome_cate_by_name(
        start_entity_name)
    end_node = defaultGraphAccessor.get_node_by_wikipedia_link(end_url)
    if end_node is None:
        end_node = wikipediaGraphAccessor.create_wikipedia_item_entity_by_url(
            end_url)
 def setUp(self):
     self.graphClient = DefaultGraphAccessor(GraphClient())
     self.nodeCleaner = NodeCleaner()
 def test_rename_property(self):
     self.graphClient = DefaultGraphAccessor(GraphClient(server_number=0))
     node_list = self.graphClient.find_by_name_property("awesome item", "acl9")
     result = rename_property(node_list)
     print result
Пример #17
0
from skgraph.graph.node_cleaner import GraphJsonParser

reload(sys)
sys.setdefaultencoding("utf-8")

app = Flask(__name__)
CORS(app)
db_handler = SQLAlchemyHandler()
db_handler.setLevel(logging.WARN)  # Only serious messages
app.logger.addHandler(db_handler)

logger = Logger("neo4jServer").get_log()
logger.info("create logger")

client = GraphClient(server_number=4)
graphClient = DefaultGraphAccessor(client)
logger.info("create graphClient")

apiGraphClient = APIGraphAccessor(client)
semanticSearchAccessor = SemanticSearchAccessor(client)

# api_entity_linker = APIEntityLinking()
# logger.info("create api_entity_linker object")

questionAnswerSystem = QuestionAnswerSystem()
logger.info("create questionAnswerSystem")

dbSOPostSearcher = SOPostSearcher(EngineFactory.create_so_session(),
                                  logger=app.logger)
logger.info("create SO POST Searcher")
Пример #18
0
# -*- coding:utf8 -*-
import sys
import nltk
from skgraph.graph.accessor.graph_accessor import GraphClient
from skgraph.graph.accessor.graph_accessor import DefaultGraphAccessor

reload(sys)
sys.setdefaultencoding('utf8')

graph_client = GraphClient(server_number=0)
graph_accessor = DefaultGraphAccessor(graph_client)
'''
merge node and relation belong to extended knowledge and entity which name 
'''


class DuplicateCleaner:
    def merge_node_with_same_name(self):
        print 'begin merge node with same name'
        query = "match(a:`extended knowledge`:entity) return id(a) as id,a.name as name"
        try:
            nodeList = []
            result = graph_accessor.graph.run(query)
            for n in result:
                node = (n['id'], n['name'])
                nodeList.append(node)
        except Exception:
            return []
        nodeDict = {}
        a = 1
        for n in nodeList:
Пример #19
0
 def setUp(self):
     self.graphClient = DefaultGraphAccessor(GraphClient())
     self.filter = NodeRelationFilter()
    def setUp(self):
        graphClient = DefaultGraphAccessor(GraphClient(server_number=1))
        relation_util = RelationUtil(graphClient)

        self.relation_util = relation_util
Пример #21
0
 def __init__(self):
     #file_dir = os.path.split(os.path.realpath(__file__))[0]
     #self.path = os.path.join(file_dir, self.path)
     #self.stackoverflow_word2vec_model = Word2Vec.load(self.path)
     self.graph_operator = DefaultGraphAccessor(GraphClient())
     print "done load word2vec"
import threading
from Queue import Queue

from py2neo import Node

from skgraph.graph.accessor.graph_accessor import DefaultGraphAccessor, GraphClient
from skgraph.graph.accessor.graph_client_for_rwr import RandomWalkGraphAccessor
from skgraph.graph.operation.generateDataPpi import DataCreateUtil
from skgraph.graph.operation.random_walk_restart import Walker

log_file = open('log_single.txt', 'w')
input_file_name = 'data.ppi'
node_id = 53157
client_1 = DefaultGraphAccessor(GraphClient(server_number=1))
client_2 = DefaultGraphAccessor(GraphClient(server_number=2))
data_util = DataCreateUtil(client_1)
end_status = 0


class Producer(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.data = queue

    def run(self):
        print 'begin produce'
        log_file.writelines('begin  %d\n' % (node_id))
        log_file.flush()
        if client_1.find_node_by_id(node_id) == None:
            return 0
        if data_util.createData(node_id) == 1:
Пример #23
0
from py2neo import Relationship

from skgraph.graph.accessor.graph_accessor import DefaultGraphAccessor, GraphClient
from graph_operation import GraphOperation

graphClient = DefaultGraphAccessor(GraphClient(server_number=1))


class JavaReturnValueTypeLinkerOperation(GraphOperation):
    name = 'JavaReturnValueTypeLinkerOperation'

    def operate(self, node):
        return_value_type = node["value type"]
        if return_value_type:
            type_node = graphClient.find_one_by_alias_name_property(
                "java class", return_value_type)
            if type_node is not None:
                relation = Relationship(node, "type of", type_node)
                graphClient.merge(relation)
        return node, node
 def setUp(self):
     self.graphClient = DefaultGraphAccessor(GraphClient())
Пример #25
0
 def __init__(self, graph_client, api_searcher, api_semantic_search):
     self.graph_accessor = DefaultGraphAccessor(graph_client)
     self.api_searcher = api_searcher
     self.api_semantic_search = api_semantic_search
Пример #26
0
 def init(self):
     self.graphClient = DefaultGraphAccessor(GraphClient(server_number=4))
     self.session = EngineFactory.create_session()
     print("init complete")