def sync_meetup_data(group):
    graph = Graph(host=config['neo4j']['host'], user=config['neo4j']['user'],
                  password=config['neo4j']['password'])

    location = get_group_location(group)

    tx = graph.begin()
    location_node = Node('Location', city=location['city'], state=location['state'], country=location['country'])
    tx.create(location_node)
    tx.commit()

    meetup_groups = get_groups_in_location(location, category=34)

    logger.info('Finding upcoming meetup events at {} meetup groups'.format(len(meetup_groups)))

    for group in meetup_groups:
        time.sleep(2)
        group, events = get_group_events(group)
        tx = graph.begin()
        group_node = Node("Group", name=group)
        tx.create(group_node)
        location_relation = Relationship(location_node, 'HAS MEETUP', group_node)
        tx.create(location_relation)
        for event in events:
            event_node = Node('Event', name=event['name'], time=event['time'])
            tx.create(event_node)
            rel = Relationship(group_node, "HAS EVENT", event_node)
            tx.create(rel)
        tx.commit()
        logger.info('Transaction ({}) status: {}'.format(group, str(tx.finished())))
Пример #2
0
    def handle(self, *args, **options):  # pylint: disable=unused-argument
        """
        Iterates through each course, serializes them into graphs, and saves
        those graphs to neo4j.
        """
        # first, make sure that there's a valid neo4j configuration
        if settings.NEO4J_CONFIG is None:
            raise CommandError(
                "No neo4j configuration (NEO4J_CONFIG) defined in lms.auth.json."
            )

        auth_params = ["{host}:{https_port}", "{user}", "{password}"]
        authenticate(*[param.format(**settings.NEO4J_CONFIG) for param in auth_params])

        graph = Graph(**settings.NEO4J_CONFIG)

        mss = ModuleStoreSerializer()

        total_number_of_courses = len(mss.all_courses)

        for index, course in enumerate(mss.all_courses):
            # first, clear the request cache to prevent memory leaks
            RequestCache.clear_request_cache()

            log.info(
                "Now exporting %s to neo4j: course %d of %d total courses",
                course.id,
                index + 1,
                total_number_of_courses
            )
            nodes, relationships = mss.serialize_course(course.id)
            log.info(
                "%d nodes and %d relationships in %s",
                len(nodes),
                len(relationships),
                course.id
            )

            transaction = graph.begin()
            try:
                # first, delete existing course
                transaction.run(
                    "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
                        six.text_type(course.id)
                    )
                )

                # now, re-add it
                self.add_to_transaction(nodes, transaction)
                self.add_to_transaction(relationships, transaction)
                transaction.commit()

            except Exception:  # pylint: disable=broad-except
                log.exception(
                    "Error trying to dump course %s to neo4j, rolling back",
                    six.text_type(course.id)
                )
                transaction.rollback()
def expot_data(cid, data):
    """
    将数据导入到neo4j,给每个导入的实体添加一个标签cid.
    :param cid: 
    :param data: 
    :return: 
    """
    title = data[0]
    host, http_port, bolt_port, user, password = '******', 7474, 7687, 'neo4j', 'gswewf'
    graph = Graph(host=host, http_port=http_port, bolt_port=bolt_port, user=user, password=password)
    # title = ["_id", "_labels", "tagline", "title", "released", "name", "born", "_start", "_end", "_type", "roles"]
    _start_index = title.index('_start')
    node_property = title[2:_start_index]
    relation_property = title[_start_index + 3:]
    nodes = {}
    relationships = []
    tx = graph.begin()
    for line in data[1:]:
        _id, _labels = line[:2]
        node_property_value = line[2:_start_index]
        _start, _end, _type = line[_start_index:_start_index + 3]
        relation_property_value = line[_start_index + 3:]
        _labels = [label for label in _labels.strip().split(':') if label]
        _labels.append(cid.capitalize())
        # print(line)
        # nodes = {"a": Node("person", name="weiyudang", age=13), "b": Node("person", name="wangjiaqi")}
        if _id and not _start and not _end:
            property_dict = {k: v for k, v in zip(node_property, node_property_value) if v}
            _cid = "{}_{}".format(cid.lower(), _id)
            updatetime = int(time.time() * 1000)  # 与 neo4j的timestamp()一致
            node = Node(*_labels, _cid=_cid, updatetime=updatetime, **property_dict)
            # graph.merge(node)
            nodes.setdefault(_cid, node)
            tx.create(node)
        elif not _id and _start and _end:
            property_dict = {k: v for k, v in zip(relation_property, relation_property_value) if v}
            start_cid = "{}_{}".format(cid.lower(), _start)
            end_cid = "{}_{}".format(cid.lower(), _end)
            # a = Node(_cid=start_cid)
            # b = Node(_cid=end_cid)
            a = nodes.get(start_cid)
            b = nodes.get(end_cid)
            a_knows_b = Relationship(a, _type, b, **property_dict)
            # graph.merge(a_knows_b)
            relationships.append(a_knows_b)
            tx.create(a_knows_b)
        else:
            raise ValueError("数据有误: {}".format(line))
    print(len(nodes), len(relationships))
    # sub_graph = Subgraph(nodes=nodes, relationships=relationships)
    # graph.create(sub_graph)
    tx.commit()
Пример #4
0
class BaseUploader(object):
    __metaclass__ = abc.ABCMeta

    def __init__(self, graph_url, file_to_process):
        #watch("httpstream")
        self.graph = Graph(graph_url)
        self.setup(self.graph)
        dir = os.path.dirname(os.path.dirname(__file__))
        self.input_file = os.path.join(dir, file_to_process)
        self.idx = 0
        print('connected to graph db at : ' + str(self.graph))

    @abc.abstractmethod
    def setup(self, graph):
        """Process the file.
        :rtype : None
        """

    @abc.abstractmethod
    def add_query(self, record, tx):
        """Process the file."""
        return

    def process(self):
        """Process the file."""
        print('start processing')
        with open(self.input_file, 'rt', encoding='utf-8') as infile:
            reader = csv.DictReader(infile, quoting=csv.QUOTE_NONE)
            tx = self.graph.begin()
            for row in reader:
                if self.idx % 1000 == 0 and self.idx != 0:
                    tx.commit()
                    tx = self.graph.begin()
                    print('commited 1000 rows till row:' + str(self.idx))
                self.add_query(row, tx)
                self.idx += 1
            tx.commit()
Пример #5
0
def neo4j(user,password,hostname,data):
    try:
        # Authenticate for server and connect it
        authenticate (hostname, user, password)
        graph=Graph()
    # If server is not connected :
    except Exception:
        print ("Unable to reach server.")
        sys.exit()

    graph.data("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r")

    #start graph operations
    start=graph.begin()

    # Create node for Movies
    for movie in data.movies:
        movie_node=Node("Movies",
            mov_id=movie.ID,
            title=movie.title,
            released_year=movie.year,
            rating=movie.rating,
            genre=movie.genre)
        start.merge(movie_node)

    # Create node for every director in data.directors
    for director in data.directors:
        director_node=Node("Directors",userid=director.ID, fullname=director.name)
        start.merge(director_node)

    # Create node for every actor in data.actors
    for actor in data.actors:
        actor_node=Node("Actors", userid=actor.ID, fullname=actor.name)
        start.merge(actor_node)

    # Create node for every collector in data.collectors
    for collector in data.collectors:
        collector_node = Node("Collectors",userid=collector.ID, fullname=collector.name, email=collector.email)
        start.merge(collector_node)

    start.commit()

    relation(data,graph)
    queries(data,graph)
Пример #6
0
class Neo4j():

    def __init__(self, host='localhost:7474', username='******', password='******'):
        if not has_py2neo:
            raise Exception('py2neo is required, please install: pip install py2neo')
        authenticate(host, username, password)
        self.graph = Graph("http://{}/db/data/".format(host))

    def load_events_directory(self, directory):
        self.events = []
        for path in glob.glob(os.path.join(directory, '*.json')):
            e = MISPEvent()
            e.load(path)
            self.import_event(e)

    def del_all(self):
        self.graph.delete_all()

    def import_event(self, event):
        tx = self.graph.begin()
        event_node = Node('Event', uuid=event.uuid, name=event.info)
        # event_node['distribution'] = event.distribution
        # event_node['threat_level_id'] = event.threat_level_id
        # event_node['analysis'] = event.analysis
        # event_node['published'] = event.published
        # event_node['date'] = event.date.isoformat()
        tx.create(event_node)
        for a in event.attributes:
            attr_node = Node('Attribute', a.type, uuid=a.uuid)
            attr_node['category'] = a.category
            attr_node['name'] = a.value
            # attr_node['to_ids'] = a.to_ids
            # attr_node['comment'] = a.comment
            # attr_node['distribution'] = a.distribution
            tx.create(attr_node)
            member_rel = Relationship(event_node, "is member", attr_node)
            tx.create(member_rel)
            val = Node('Value', name=a.value)
            ev = Relationship(event_node, "has", val)
            av = Relationship(attr_node, "is", val)
            s = val | ev | av
            tx.merge(s)
            #tx.graph.push(s)
        tx.commit()
Пример #7
0
def update_neo4j_db(_neo_data, _print_status_freq):
    # neo4j access stuff
    neo_uri = r"bolt://localhost:7687"
    auth = (r"neo4j", r"abc")
    stmt1 = r'MERGE (:Image {name: $_in_img_name, dataset: $_in_img_dataset})'
    stmt2 = r'MERGE (:Object {name: $_in_obj_det})'
    stmt3 = r'MATCH (i1:Image{name: $_in_img_name, dataset: $_in_img_dataset}) MATCH (o1:Object{name: $_in_obj_det}) CREATE (i1)-[:HAS{score: $_in_det_score}]->(o1)'
    total_images_info_count = len(_neo_data)
    try:
        graph = Graph(uri="bolt://localhost:7687", auth=("neo4j", "abc"))
        for count, each_img_info in enumerate(_neo_data):
            if count % _print_status_freq == 0:
                print(
                    f"\t\tProcessing image {count + 1} of {total_images_info_count}"
                )
            tx = graph.begin()
            # create Image node if not already existing
            tx.run(stmt1,
                   parameters={
                       "_in_img_name": each_img_info["img"],
                       "_in_img_dataset": each_img_info["datasource"]
                   })
            for each_detection in each_img_info["det"]:
                # create Object node if not already existing
                tx.run(stmt2, parameters={"_in_obj_det": each_detection[0]})
                # create HAS relation between above nodes. Note by now the image and object nodes must exist
                tx.run(stmt3,
                       parameters={
                           "_in_img_name": each_img_info["img"],
                           "_in_img_dataset": each_img_info["datasource"],
                           "_in_obj_det": each_detection[0],
                           "_in_det_score": each_detection[1]
                       })
            tx.commit()
            while not tx.finished():
                pass  # tx.finished return True if the commit is complete
    except Exception as error_msg_neo_write:
        print(f"\n\nUnexpected ERROR attempting entry to neo4j.")
        print(f"\nMessage:\n{error_msg_neo_write}")
        print(f"\nFunction call return with RC=1000.\n\n")
        return (1000)
    # return with RC = 0 as successful processing
    return 0
 def leave_group(self, group_name, user_name):
     graph = Graph('bolt://localhost:7687',
                   username='******',
                   password='******')
     g = graph.begin()
     g.run("MATCH (a:Group) WHERE a.groupName=$gname DETACH DELETE a",
           gname=group_name)
     myGroups = self.my_groups(user_name)
     for group in myGroups:
         if group != group_name:
             groups = group
             groups = group + "/n"
     graphdb = GraphDatabase.driver("bolt://localhost:7687",
                                    auth=("neo4j", "facebook"),
                                    encrypted=False)
     graphdb.session().run(
         "MATCH (a:User {userName:$uname}) SET a.groups=$group",
         uname=user_name,
         group=groups)
Пример #9
0
def add_to_graph(yearcaseno1, id1, title1, yearcaseno2, id2):
    graph = Graph("http://localhost:7474/db/data/", password=NEO4J_PASSWORD)

    if id1 is not None:
        node_1 = Node("CASE", yearcaseno=yearcaseno1, id=id1, title=title1)
    else:
        node_1 = Node("CASE", yearcaseno=yearcaseno1)

    if id2 is not None:
        node_2 = Node("CASE", yearcaseno=yearcaseno2, id=id2)
    else:
        node_2 = Node("CASE", yearcaseno=yearcaseno2)
    rel = Relationship(node_1, "REFER", node_2)

    tx = graph.begin()
    tx.merge(node_1, primary_label='CASE', primary_key='yearcaseno')
    tx.merge(node_2, primary_label='CASE', primary_key='yearcaseno')
    tx.merge(rel)
    tx.commit()
Пример #10
0
def create_node_and_rel(node):
    """对于一个包含子图所有信息的node,将子图生成到Neo4j中去。

    Args:
        node(Nodes): 一个`trie.Nodes`对象

    Returns:

    """
    graph = Graph(neo4j_url, auth=auth)
    tx = graph.begin()
    root_node = create_node(tx, node.value, node.label, 0)

    def func(p_node: Node, nodes: Nodes, i: int):
        """一个递归调用的函数。

        Args:
            p_node: 一个`py2neo.Node`对象
            nodes: 一个`trie.Nodes`对象
            i: 记录层级

        Returns:

        """
        if isinstance(nodes, list):
            return
        data = nodes.children
        if not data:
            tx.create(p_node)
            return
        for j in data:  # j也是一个`trie.Nodes`对象
            node_ = create_node(tx, j.value, j.label, i)
            rel = j.rel
            tx.create(Relationship(p_node, rel, node_))
            if not j.children:
                continue
            else:  # node_存在子节点,因此递归调用
                k = i + 1
                func(node_, j, k)

    func(root_node, node, 1)
    tx.commit()
Пример #11
0
def calcPR():
    # print('start')
    # fo = open("graph.txt", "w")
    graph = Graph('bolt://47.113.103.137:10087', auth=('neo4j', 'pedia_search'))
    G = nx.DiGraph()
    node_matcher = NodeMatcher(graph)
    nodes = node_matcher.match('Entry').all()
    for node in nodes:
        G.add_node(node['page_id'])
        print("node page_id: ",node['page_id'])
    print("number of nodes: ",G.number_of_nodes())

    relationships = graph.match(nodes=None, r_type= 'linkTo', limit=None).all()
    i = 0
    print('start loop')
    for relationship in relationships:
        i = i + 1
        print(i)
        print(relationship)
        start = relationship.start_node['page_id']
        end = relationship.end_node['page_id']
        print('edge: ',start,' -> ',end)
        G.add_edge(*(start,end))

    print('start pagerank')
    result = nx.pagerank(G, alpha=0.85, personalization=None, max_iter=20000, tol=1e-06, weight='weight', dangling=None)
    print(result)
    print(sorted(result.items(), key=lambda kv: (kv[1], kv[0])))
    i = 0
    transaction = graph.begin()
    for cur_page_id in result:
        i = i + 1
        weight = result[cur_page_id]
        print("node:",i,cur_page_id,weight)
        matcher = NodeMatcher(graph)
        node = matcher.match(page_id = cur_page_id).first()
        # 操作
        node['weight'] = weight  # 修改weight
        transaction.push(node)

    transaction.commit()
Пример #12
0
class Neo4jPipline(object):

    def __init__(self):
        self.graph = Graph("http://xxx.xx.xx.xxx:7474",username="******",password="******")

    def process_item(self, item, spider):
        tx = self.graph.begin()
        worker_list = [{"name":item["username"]},{"name":item["following"]}]
        for worker in worker_list:
            node = Node("Person",**worker)
            tx.merge(node)
        node_1 = Node(name=item["username"])
        node_2 = Node(name=item["following"])
        rel = Relationship(node_1,"following",node_2)
        try:
            tx.merge(rel)
            print("successful")
            tx.commit()
        except Exception as e:
            print(e)
            print("Failed")
def main():
    parser = VkParser(DRIVER_PATH)
    parser.login(LOGIN, PASSWORD)
    friends_ids = list(parser.get_friends_list(START_ID))
    edges = {START_ID: friends_ids}
    for id in friends_ids:
        edges[id] = []
        for friend_id in parser.get_friends_list(id):
            if friend_id in friends_ids:
                edges[id] += [friend_id]
    nodes = {}
    g = Graph()
    tx = g.begin()
    for id in friends_ids:
        node = Node("Person", id=id)
        nodes[id] = node
    for id in friends_ids:
        for friend in nodes[id]:
            edge = Relationship(nodes[id], "FRIENDSHIP", nodes[friend])
            tx.create(edge)
            tx.commit()
Пример #14
0
def create_gics_node(graph: Graph):
    event = StockEventBase(GLOBAL_HEADER)
    df = event.mysql.select_values('gics', 'code,name,level')
    df.columns = ['code', 'name', 'level']
    sector = df[df['level'] == 0]
    industry_group = df[df['level'] == 1]
    industry = df[df['level'] == 2]
    sub_industry = df[df['level'] == 3]
    t = graph.begin()
    label0 = ('gics', 'Sector')
    for index, node in sector.iterrows():
        t.create(Node(*label0, code=node['code'], name=node['name'])) 
    label1 = ('gics', 'Industry_Group')
    for index, node in industry_group.iterrows():
        t.create(Node(*label1, code=node['code'], name=node['name']))
    label2 = ('gics', 'Industry')
    for index, node in industry.iterrows():
        t.create(Node(*label2, code=node['code'], name=node['name']))
    label3 = ('gics', 'Sub_Industry')
    for index, node in sub_industry.iterrows():
        t.create(Node(*label3, code=node['code'], name=node['name']))
    t.commit()
 def add_friend_to_person_node(self, user_name, friend):
     graph = Graph('bolt://localhost:7687',
                   username='******',
                   password='******')
     g = graph.begin()
     friends = g.run("MATCH (a:User {userName:$uname}) RETURN a.friends",
                     uname=user_name).evaluate()
     if friends == " ":
         friends = friend
     else:
         friends = friends + "\n" + friend
     graphdb = GraphDatabase.driver("bolt://localhost:7687",
                                    auth=("neo4j", "facebook"),
                                    encrypted=False)
     graphdb.session().run(
         "MATCH (a:User {userName:$uname}) SET a.friends=$friend",
         uname=user_name,
         friend=friends)
     graphdb.session().run(
         "MATCH (a:User),(b:User) WHERE a.userName = $uname AND b.firstName = $friendname CREATE (a)-[r:is_friends_with]->(b)",
         uname=user_name,
         friendname=friend)
Пример #16
0
def Create_Nodes(df):
    #Connexion à la base NEO4J perso
    graph = Graph(password='******')

    i = 0
    for i in range(df.shape[0]):
        tx = graph.begin()
        author = df.iloc[i]["author"]
        tweet = df.iloc[i]["text"]
        #        id_tweet=df.iloc[i].index()
        create_date = df.iloc[i]["create_date"]
        author_location = df.iloc[i]["author_location"]
        full_name = df.iloc[i]["full_name"]

        a = Node("Twittos",
                 name=author,
                 tweet=tweet,
                 create_date=str(create_date),
                 full_name=full_name,
                 author_location=author_location)
        tx.create(a)
        tx.commit()
def create_nodes():
    file_names = glob.glob('../data_users_moves/*.csv')
    my_set = set()

    for file in file_names:
        filename = os.fsdecode(file)
        if not filename.endswith('.csv'):
            continue
        print('Currently using file - ', filename)
        df = pd.read_csv(filename, header=None)
        df.columns = ['Timestamp', 'id1', 'id2']
        y = set(list(df['id1']))
        z = set(list(df['id2']))
        ids_of_one_csv = y.union(z)
        my_set.update(ids_of_one_csv)

    graph = Graph('127.0.0.1', password='******')
    tx = graph.begin()
    for value in my_set:
        user = Node('User', id=value)
        tx.create(user)
    tx.commit()
Пример #18
0
def f_relation_insert(ent1,
                      ent2,
                      rel,
                      tab_name,
                      label1=None,
                      label2=None,
                      sql=None):
    print(label1)
    g = Graph("http://115.159.65.147:7474", username="******", password="******")
    t1 = time.time()
    conn = cx.connect('nsyy', 'uat_NSYY', '192.168.0.110:1521/orcl')
    cur = conn.cursor()
    if sql == None:
        f = cur.execute('select {0},{1},{2} from {3} where rownum<=100'.format(
            ent1, rel, ent2, tab_name))
    else:
        f = cur.execute(sql)
    tx = g.begin()
    t1 = time.time()
    cnt = 0
    for i in f.fetchall():
        ent1 = i[0]
        ent2 = i[2]
        rel = i[1]
        print(ent1, ent2, rel)
        left_rel = g.find_one(label=label1,
                              property_key="name",
                              property_value=ent1)
        print(left_rel)
        right_rel = g.find_one(label=label2,
                               property_key="name",
                               property_value=ent2)
        relation_insert = Relationship(left_rel, rel, right_rel)
        tx.create(relation_insert)
        cnt = cnt + 1
        tx.commit()
    time_second = time.time() - t1
    print('关系导入完成,共导入关系{0}个,时间为{1}秒'.format(cnt, time_second))
Пример #19
0
class DataToNeo4j:
    """将excel中数据存入neo4j"""
    def __init__(self):
        """建立连接"""
        self.graph = Graph('http://localhost:7474',
                           username='******',
                           password='******')
        # 定义label
        self.invoice_name = '发票名称'
        self.invoice_value = '发票值'
        self.graph.delete_all()

    def create_node(self, node_list_key, node_list_value):
        """建立节点"""
        for k in node_list_key:
            name_node = Node(label=self.invoice_name, name=k)
            self.graph.create(name_node)

        for v in node_list_value:
            value_node = Node(label=self.invoice_value, name=v)
            self.graph.create(value_node)

    def create_relation(self, df_data):
        """建立联系"""
        tx = self.graph.begin()
        for _, row in df_data.iterrows():
            tx.evaluate('''
                   MERGE (a:发票名称 {property:$name})
                   MERGE (b:发票值 {property:$name2})
                   MERGE (a)-[r:R_TYPE{property:$p}]->(b)
                   ''',
                        parameters={
                            'name': row['name'],
                            'name2': row['name2'],
                            'p': row['relation']
                        })

        tx.commit()
Пример #20
0
def build_neo_graph(delta, call_graph1, call_graph2):
    nodes = {}
    graph = Graph(password='******')
    tx = graph.begin()
    for key in delta.keys():
        nodes[key] = Node("function", title=key, content=delta[key])
        tx.merge(nodes[key])

    for entry in call_graph1:
        calling_fun = entry['referringFunction'] + "@" + entry['referringFile']
        called_fun = entry['referredFunction'] + "@" + entry['referredFile']
        relation = Relationship(nodes[calling_fun], "Original Call To",
                                nodes[called_fun])
        tx.merge(relation)

    for entry in call_graph2:
        calling_fun = entry['referringFunction'] + "@" + entry['referringFile']
        called_fun = entry['referredFunction'] + "@" + entry['referredFile']
        relation = Relationship(nodes[calling_fun], "Modified Call To",
                                nodes[called_fun])
        tx.merge(relation)

    tx.commit()
Пример #21
0
def addAffirmationToGraph(affirmation):
    authenticate(settings.SECRET_NEO4J_DB_HOSTPORT,
                 settings.SECRET_NEO4J_DB_USER,
                 settings.SECRET_NEO4J_DB_PASSWORD)
    graph = Graph()
    tx = graph.begin()
    claim = affirmation.claim
    claimNode = Node("Claim",
                     claim_id=claim.id,
                     name=claim.name,
                     content=claim.content)
    tx.merge(claimNode, 'claim_id')
    user = affirmation.user
    userNode = Node("User",
                    user_id=user.id,
                    name=user.username)
    tx.merge(userNode, 'user_id')
    affirmsRelationship = Relationship(userNode,
                                       "Affirms",
                                       claimNode,
                                       affirmation_id=affirmation.id)
    tx.merge(affirmsRelationship, 'affirmation_id')
    tx.commit()
Пример #22
0
def Create_Nodes(api, df):
    #Connexion à la base NEO4J perso
    graph = Graph(password='******')

    i = 0
    for i in range(df.shape[0]):
        Check_Limit_API(
            api)  #Fonction qui vérifie s'il reste des requêtes dispo sur l'API
        #            print time.ctime()

        tx = graph.begin()
        author = df.iloc[i]["author"]
        tweet = df.iloc[i]["text"]
        create_date = df.iloc[i]["create_date"]
        author_location = df.iloc[i]["author_location"]
        full_name = df.iloc[i]["full_name"]
        lang = df.iloc[i]["lang"]
        iso_lang = df.iloc[i]["iso_lang"]

        a = Node("Twittos",
                 name=author,
                 tweet=tweet,
                 create_date=str(create_date),
                 full_name=full_name,
                 author_location=author_location,
                 lang=lang,
                 iso_lang=iso_lang)
        tx.create(a)
        tx.commit()

        if RETWEET_ONLY:
            tweets_id = df.index[i]
            df = Create_Retweet_Link(api, tweets_id, a, author, df)
        else:
            followers = getFollowers(api, author, ENTRY_FOLLOWER)
            Create_Relationship(api, followers, a, author)
    return df
Пример #23
0
def init_graph(our_graph: OurGraph,
               user: str,
               password: str,
               url: str = 'bolt://localhost:7687'):
    graph = Graph(f"{url}", user=user, password=password)
    graph.delete_all()
    tx = graph.begin()

    nodes_dict = create_nodes_dict(our_graph)
    count = 0
    progress = 0.0
    total = len(nodes_dict.values())
    for v in nodes_dict.values():
        tx.create(v)

        if count > (total / 100):
            count = 0
            progress += 1
            print('Vertex Progress', progress, '%')

        count += 1
    count = 0
    progress = 0.0
    total = len(our_graph.E)
    for e in our_graph.E:
        source = e.from_vertex.id
        target = e.to_vertex.id
        new_edge = Relationship(nodes_dict[source], e.type, nodes_dict[target])
        tx.create(new_edge)
        if count > (total / 100):
            count = 0
            progress += 1
            print('Edge Progress', progress, '%')
        count += 1

    tx.commit()
Пример #24
0
def create_relationship_in_gics_node(graph: Graph):

    t = graph.begin()
    n0 = graph.nodes.match("gics", "Sector")
    n0_list = list(n0)
    n1 = graph.nodes.match("gics", "Industry_Group")
    n1_list = list(n1)
    n2 = graph.nodes.match("gics", "Industry")
    n2_list = list(n2)
    n3 = graph.nodes.match("gics", "Sub_Industry")
    n3_list = list(n3)
    for nx in n0_list:
        for ny in n1_list:
            if re.match(nx['code'], ny['code']):
                t.create(Relationship(nx, 'sub_class', ny))
    for nx in n1_list:
        for ny in n2_list:
            if re.match(nx['code'], ny['code']):
                t.create(Relationship(nx, 'sub_class', ny))
    for nx in n2_list:
        for ny in n3_list:
            if re.match(nx['code'], ny['code']):
                t.create(Relationship(nx, 'sub_class', ny))
    t.commit()
Пример #25
0
 def save_simple_graph(self, number_list):
     client = Graph(password=os.getenv('NEO4J_PASSWORD', 'Neo4j'))
     graph = client.begin()
     prev_number = 0
     for item in number_list:
         number_node = self.node_exist(int(item))
         if prev_number != 0 and number_node:
             prev_number_node = Node("number", number=int(prev_number))
             prev_number = int(item)
             num_relationship = Relationship(prev_number_node, "NEXT",
                                             number_node)
             graph.merge(num_relationship)
             break
         number = Node("number", number=int(item))
         graph.merge(number)
         if prev_number == 0:
             prev_number = int(item)
             continue
         prev_number_node = Node("number", number=int(prev_number))
         prev_number = int(item)
         num_relationship = Relationship(prev_number_node, "NEXT", number)
         graph.merge(prev_number_node)
         graph.merge(num_relationship)
     graph.commit()
# In[16]:

df.head(3)

# In[37]:

# neo4j configuration/credentials
uri = config["neo4j"]["uri"]
user = config["neo4j"]["user"]
password = config["neo4j"]["password"]
graph = Graph(uri=uri, user=user, password=password)

# In[39]:

gb = graph.begin()

topic_entity_name = []
for index, row in tqdm(df.iterrows()):
    # get the topic number of the doc and create node
    topic = row["topic_number"]
    topic_node = graph.nodes.match("TopicNumber",
                                   name="Topic_" + str(topic)).first()
    if topic_node is None:
        topic_node = Node("TopicNumber", name="Topic_" + str(topic))
        graph.create(topic_node)

    topic_keywords = row["topic_keywords"]

    temp = []
    #creating nodes and relationship between entities present in topic and topic number to which a corresponding doc belongs to
Пример #27
0
def generateInvoice(order_id):
    seller = 'Team Team'
    connection = mysql.connector.connect(
        host='mysql98.unoeuro.com', database='zakeovich_dk_db_cphbusiness')
    cursor = connection.cursor()

    #Update order status
    cursor.execute(
        f"UPDATE zakeovich_dk_db_cphbusiness.order o SET o.status = 'completed' WHERE o.id = {order_id};"
    )

    #Get order row
    cursor.execute(
        f"SELECT o.total, o.order_no, o.status FROM zakeovich_dk_db_cphbusiness.order o WHERE o.id = {order_id};"
    )
    order_total = cursor.fetchall()
    order_total = order_total[0][0]

    #Generate invoice for given order and store in sql
    cursor.execute(
        f"INSERT INTO zakeovich_dk_db_cphbusiness.invoice (fk_order_id, total, due_date, issue_date, seller) VALUES ({order_id}, {order_total}, date_add(current_date(), INTERVAL 14 DAY), current_date(), '{seller}');"
    )

    #Get the ordernumber of the order
    cursor.execute(
        f"SELECT o.order_no, o.status FROM zakeovich_dk_db_cphbusiness.order o WHERE o.id = {order_id};"
    )
    order = cursor.fetchall()
    order_no = order[0][0]
    order_status = order[0][1]

    #Commit SQL update
    connection.commit()

    invoice_id = cursor.lastrowid
    cursor.execute(
        f"SELECT i.total, i.due_date, i.issue_date, i.seller FROM zakeovich_dk_db_cphbusiness.invoice i WHERE i.id = {invoice_id};"
    )
    invoice = cursor.fetchall()

    cursor.close()
    connection.close()

    graph = Graph("bolt://35-202-37-187.gcp-neo4j-sandbox.com:7687",
                  auth=("neo4j", "cy3yxxzcXDN6UKnw"),
                  secure=True)
    #graph = Graph(scheme='bolt',host='hobby-ppgaodfmmciegbkemkpmdcel.dbs.graphenedb.com',port=24787, user='******', password='******',secure=True)

    #Create a nodematcher
    matcher = NodeMatcher(graph)

    #Open the connection
    g_conn = graph.begin()
    order_node = matcher.match("Order", order_no=order_no).first()

    #Update order in Neo4j
    graph.run(
        f"MATCH (o {{ order_no: {order_no}}}) SET o.status = '{order_status}'")

    #Serve invoice to Neo4j
    invoice_node = Node("Invoice",
                        total=invoice[0][0],
                        due_date=str(invoice[0][1]),
                        seller=str(invoice[0][3]))

    g_conn.create(invoice_node)

    order_invoice = Relationship(order_node,
                                 "GENERATED",
                                 invoice_node,
                                 issue_date=str(invoice[0][2]))
    g_conn.create(order_invoice)

    #Commit the changes
    g_conn.commit()
Пример #28
0
 def __init__(self):
    self.__redis = redis.StrictRedis(host='redis', port=6379, db=0)
    graph = Graph("http://*****:*****@neo4j:7474/db/data/")
    self.__tx = graph.begin()
Пример #29
0
from py2neo import Database
from py2neo import Graph, Node
import numpy as np
""" Connection to Neo4j Database """
dvdrental = "bolt://localhost:7687"
db = Database(dvdrental)
graph = Graph(password='******')
db = graph.begin(autocommit=False)

ret = graph.run('''match (f:Film)-[fr:FILM_RENTAL]->(r:Rental)
match (c:Customer)-[cr:CUSTOMER_RENTAL]->(r)
where fr.ID=cr.ID
with count(*) as rented_times

match (f:Film)-[in_cat:IN_CATEGORY]->(cat:Category)
with f.ID as film_ID, f.title as film_title, cat.name as category_name, rented_times
return film_ID, film_title, category_name, rented_times''').to_table()
print(ret)

db.commit()
Пример #30
0
class GotGraph(object):
    """
    This object provides a set of helper methods for creating and retrieving nodes and relationships from
    a Neo4j database holding information about players, teams, fans, comments and their relationships.
    """

    # Note:
    # I tend to avoid object mapping frameworks. Object mapping frameworks are fun in the beginning
    # but tend to be annoying after a while. So, I did not create types Player, Team, etc.
    #

    # Connects to the DB and sets a Graph instance variable.
    # Also creates a NodeMatcher and RelationshipMatcher, which are a py2neo framework classes.
    def __init__(
        self,
        auth=('neo4j', 'dbuserdbuser'),
        host='localhost',
        port=7687,
        secure=False,
    ):
        self._graph = Graph(secure=secure,
                            bolt=True,
                            auth=auth,
                            host=host,
                            port=port)
        self._node_matcher = NodeMatcher(self._graph)
        self._relationship_matcher = RelationshipMatcher(self._graph)

    def run_q(self, qs, args):
        """

        :param qs: Query string that may have {} slots for parameters.
        :param args: Dictionary of parameters to insert into query string.
        :return:  Result of the query, which executes as a single, standalone transaction.
        """
        try:
            tx = self._graph.begin(autocommit=False)
            result = self._graph.run(qs, args)
            return result
        except Exception as e:
            print("Run exaception = ", e)

    def run_match(self, labels=None, properties=None):
        """
        Uses a NodeMatcher to find a node matching a "template."
        :param labels: A list of labels that the node must have.
        :param properties: A dictionary of {property_name: property_value} defining the template that the
            node must match.
        :return: An array of Node objects matching the pattern.
        """
        #ut.debug_message("Labels = ", labels)
        #ut.debug_message("Properties = ", json.dumps(properties))

        if labels is not None and properties is not None:
            result = self._node_matcher.match(labels, **properties)
        elif labels is not None and properties is None:
            result = self._node_matcher.match(labels)
        elif labels is None and properties is not None:
            result = self._node_matcher.match(**properties)
        else:
            raise ValueError(
                "Invalid request. Labels and properties cannot both be None.")

        # Convert NodeMatch data into a simple list of Nodes.
        full_result = []
        for r in result:
            full_result.append(r)

        return full_result

    def find_nodes_by_template(self, tmp):
        """

        :param tmp: A template defining the label and properties for Nodes to return. An
         example is { "label": "Fan", "template" { "last_name": "Ferguson", "first_name": "Donald" }}
        :return: A list of Nodes matching the template.
        """
        labels = tmp.get('label', None)
        props = tmp.get("template", None)
        result = self.run_match(labels=labels, properties=props)
        return result

    def create_node(self, label, **kwargs):
        n = Node(label, **kwargs)
        tx = self._graph.begin(autocommit=True)
        tx.create(n)
        return n

    # Given a UNI, return the node for the Fan.
    def get_character(self, character_id):
        n = self.find_nodes_by_template({
            "label": "Character",
            "template": {
                "character_id": character_id
            }
        })
        if n is not None and len(n) > 0:
            # I should throw an exception here if there is more than 1.
            n = n[0]
        else:
            n = None

        return n

    def get_related_characters(self, ch_id, r_kind):

        t = self.get_character(ch_id.upper())
        nodes = set()
        nodes.add(t)
        n = self._relationship_matcher.match(nodes, r_type=r_kind.upper())
        return n

    def create_character_relationship(self, source_id, target_id, label):
        """
        Create a SUPPORTS relationship from a Fan to a Team.
        :param uni: The UNI for a fan.
        :param team_id: An ID for a team.
        :return: The created SUPPORTS relationship from the Fan to the Team
        """
        f = self.get_character(source_id)
        t = self.get_character(target_id)
        l = label.upper()
        r = Relationship(f, l, t)
        tx = self._graph.begin(autocommit=True)
        tx.create(r)
        return r

    def get_appearance(self, player_id, team_id, year_id):
        """
        Get the information about appearances for a player and team.
        :param player_id: player_id
        :param team_id: team_id
        :param year_id: The year for getting appearances.
        :return:
        """
        try:
            # Get the Nodes at the ends of the relationship representing appearances.
            p = self.get_player(player_id)
            t = self.get_team(team_id)

            # Run a match looking for relationships of a specific type linking the nodes.
            rm = self._graph.match(nodes=[p, t], r_type="APPEARED")
            result = []

            # If there is a list of relationships.
            if rm is not None:
                for r in rm:

                    # The type will be a class APPEARED() because of the OO mapping.
                    node_type = type(r).__name__
                    year = r['year']

                    # If the type and year are correct, add to result
                    if node_type == "APPEARED" and (year == year_id
                                                    or year_id is None):
                        result.append(r)

                return result
            else:
                return None
        except Exception as e:
            print("get_appearance: Exception e = ", e)
            raise e

    # Create an APPEARED relationship from a player to a Team
    def create_appearance_all(self, player_id, team_id, year, games):
        """

        :param player_id: O
        :param team_id:
        :param year:
        :param games:
        :return:
        """
        try:
            tx = self._graph.begin(autocommit=False)
            q = "match (n:Player {player_id: '" + player_id + "'}), " + \
                "(t:Team {team_id: '" + team_id + "'}) " + \
                "create (n)-[r:APPEARED { games: " + str(games) + ", year : " + str(year) + \
                "}]->(t)"
            result = self._graph.run(q)
            tx.commit()
        except Exception as e:
            print("create_appearances: exception = ", e)

    # Create a FOLLOWS relationship from a Fan to another Fan.
    def create_follows(self, follower, followed):
        f = self.get_fan(follower)
        t = self.get_fan(followed)
        r = Relationship(f, "FOLLOWS", t)
        tx = self._graph.begin(autocommit=True)
        tx.create(r)

    def get_comment(self, comment_id):
        """

        :param comment_id: Comment ID
        :return: Comment
        """
        n = self.find_nodes_by_template({
            "label": "Comment",
            "template": {
                "comment_id": comment_id
            }
        })
        if n is not None and len(n) > 0:
            n = n[0]
        else:
            n = None

        return n

    def create_comment(self, uni, comment, team_id=None, player_id=None):
        """
        Creates a comment
        :param uni: The UNI for the Fan making the comment.
        :param comment: A simple string.
        :param team_id: A valid team ID or None. team_id and player_id cannot BOTH be None.
        :param player_id: A valid player ID or None
        :return: The Node representing the comment.
        """
        if uni is None or comment is None or (player_id is None
                                              and team_id is None):
            raise ValueError("create_comment: invalid input.")

        comment_id = str(
            uuid.uuid4())  # Generate a UUID. Will explain in class.
        fan = None
        team = None
        player = None
        tx = None

        try:
            tx = self._graph.begin(
            )  # We are going to make a transactional update.

            fan = self.get_fan(uni)  # Get the node for the Fan.
            if fan is None:
                raise ValueError("create_comment: Fan node node found.")

            if team_id is not None:
                team = self.get_team(team_id)
                if team is None:
                    raise ValueError("create_comment: Team node node found.")

            if player_id is not None:
                player = self.get_player(player_id)
                if player is None:
                    raise ValueError("create_comment: Player node node found.")

            c = Node("Comment", comment_id=comment_id, comment=comment)
            tx.create(c)

            pc = Relationship(fan, "COMMENT_BY", c)
            tx.create(pc)

            if player is not None:
                pr = Relationship(c, "COMMENT_ON", player)
                tx.create(pr)

            if team is not None:
                p2 = Relationship(c, "COMMENT_ON", team)
                tx.create(p2)

            tx.commit()

            return c

        except Exception as e:
            if tx:
                tx.rollback()
            raise RuntimeError(
                "create_comment: Something evil happened., e = " + str(e))

    def create_sub_comment(self, uni, origin_comment_id, comment):
        """
        Create a sub-comment (response to a comment or response) and links with parent in thread.
        :param uni: ID of the Fan making the comment.
        :param origin_comment_id: Id of the comment to which this is a response.
        :param comment: Comment string
        :return: Created comment.
        """
        if uni is None or origin_comment_id is None or comment is None:
            raise ValueError("create_sub_comment: invalid input.")

        comment_id = str(
            uuid.uuid4())  # Generate a UUID. Will explain in class.
        fan = None
        origin_comment = None
        tx = None

        try:
            tx = self._graph.begin(
            )  # We are going to make a transactional update.

            fan = self.get_fan(uni)  # Get the node for the Fan.
            if fan is None:
                raise ValueError("create_sub_comment: Fan node node found.")

            origin_comment = self.get_comment(origin_comment_id)
            if origin_comment is None:
                raise ValueError(
                    "create_sub_comment: Original comment node not found.")

            c = Node("Comment", comment_id=comment_id, comment=comment)
            tx.create(c)

            sc = Relationship(c, "RESPONSE_TO", origin_comment)
            tx.create(sc)

            pc = Relationship(fan, "RESPONSE_BY", c)
            tx.create(pc)

            tx.commit()

        except Exception as e:
            if tx:
                tx.rollback()
            raise RuntimeError(
                "create_comment: Something evil happened., e = " + str(e))

    def get_sub_comments(self, comment_id):
        """

        :param comment_id: The unique ID of the comment.
        :return: The sub-comments.
        """

        q = "match (c:Comment {comment_id: {cid}})<-[response:RESPONSE_TO]-(sc:Comment) return sc,response,c"
        c = self._graph.run(q, cid=comment_id)
        return c

    def get_roster(self, team_id, year_id):

        q = "match (t:Team {team_id: {tid}})<-[a:APPEARED {year: {yid}}]-(p:Player) " + \
            " return t.team_id, t.team_name, a.year, a.games, p.player_id, p.last_name, p.first_name"
        c = self._graph.run(q, tid=team_id, yid=year_id)

        return c

    def get_player_comments(self, player_id):
        """
        Gets all of the comments associated with a player, all of the comments on the comment and comments
        on the comments, etc. Also returns the Nodes for people making the comments.
        :param player_id: ID of the player.
        :return: Graph containing comment, comment streams and commenters.
        """

        q = 'match (fan)-[by:COMMENT_BY]->(comment)-[on:COMMENT_ON]->(player:Player {player_id: {pid}}) ' + \
            'return fan.uni, fan.last_name, fan.first_name, comment.comment_id, comment.comment,  ' + \
            " player.player_id, player.last_name, player.first_name "

        result = self._graph.run(q, pid=player_id)
        return result

    def get_player_comments_g(self, player_id):
        """
        Gets all of the comments associated with a player, all of the comments on the comment and comments
        on the comments, etc. Also returns the Nodes for people making the comments.
        :param player_id: ID of the player.
        :return: Graph containing comment, comment streams and commenters.
        """

        q = 'match (fan)-[by:COMMENT_BY]->(comment)-[on:COMMENT_ON]->(player:Player {player_id: {pid}}) ' + \
            'return fan, by, comment, on, player '

        result = self._graph.run(q, pid=player_id)
        return result

    def get_team_comments(self, team_id):
        """
        Gets all of the comments associated with a teams, all of the comments on the comment and comments
        on the comments, etc. Also returns the Nodes for people making the comments.
        :param player_id: ID of the team.
        :return: Graph containing comment, comment streams and commenters.
        """
        t = self.get_team(team_id)
        nodes = set()
        nodes.add(t)
        n = self._relationship_matcher.match(nodes, r_type="COMMENT_ON")
        return n

    def get_players_by_team(self, team_id):
        """

        :param team_id: Team ID
        :return: List of Nodes representing players who played for the team.
        """
        q = "match (p:Player)-[r:APPEARED]->(t:Team) where t.team_id={tid} return p,r,t"
        result = self._graph.run(q, tid=team_id)
        result = result.data()
        ans = []
        for r in result:
            current = dict(r['p'].items())
            current['year'] = r['r']['year']
            ans.append(current)

        return ans
rel4 = Relationship(author_ankush, "Wrote", ankush_paper_4)
rel5 = Relationship(author_ankush, "Wrote", ankush_paper_5)

paper_1_conf1 = Relationship(ankush_paper_1, "Presented_at", conf1)
paper_2_conf1 = Relationship(ankush_paper_2, "Presented_at", conf1)
paper_3_conf1 = Relationship(ankush_paper_3, "Presented_at", conf1)
paper_4_conf1 = Relationship(ankush_paper_4, "Presented_at", conf1)
paper_5_conf1 = Relationship(ankush_paper_5, "Presented_at", conf1)

edition1_conf1_rel1 = Relationship(edition1_conf1, "PartOf", conf1)
edition2_conf1_rel2 = Relationship(edition2_conf1, "PartOf", conf1)
edition3_conf1_rel3 = Relationship(edition3_conf1, "PartOf", conf1)
edition4_conf1_rel4 = Relationship(edition4_conf1, "PartOf", conf1)
edition5_conf1_rel5 = Relationship(edition5_conf1, "PartOf", conf1)

tx = graph.begin()

tx.create(author_edition1)
tx.create(author_edition2)
tx.create(author_edition3)

tx.create(author_ankush)
tx.create(ankush_paper_1)
tx.create(ankush_paper_2)
tx.create(ankush_paper_3)
tx.create(ankush_paper_4)
tx.create(ankush_paper_5)

tx.create(rel1)
tx.create(rel2)
tx.create(rel3)
Пример #32
0
def main(to_handle):
    # Don't forget to start the MDG up before using this script!
    MDG = Graph()
    deps = []
    matcher = NodeMatcher(MDG)

    errors.write("In " + to_handle + ":\n")
    exceptions.write("In " + to_handle + ":\n")

    # print("Starting")

    with open(data_dir + to_handle, 'r', newline='') as f:
        reader = csv.reader(f)
        prev_gid, prev_art, prev_node, prev_version = None, None, None, None
        for row in reader:
            if len(row) < 7:
                continue
            tx = MDG.begin()
            # Get metadata
            repo, gid, aid, version, packaging, sha = (row[0], row[3], row[4],
                                                       row[5], row[6],
                                                       get_hash(row[2]))

            # Missing: release date, packaging
            # Create & add node
            repo_node = Node("Artifact",
                             stars=row[1],
                             url=row[2],
                             groupID=gid,
                             artifact=aid,
                             version=version,
                             packaging=packaging,
                             coordinates=gid + ":" + aid + ":" + version,
                             commit_hash=sha,
                             from_github="True")

            repo_deps = []
            for d in row[7:]:
                if len(d) > 2:
                    dep_list = convert_dep_to_list(d)
                    if dep_list is not None:
                        repo_deps.append(dep_list)

            # This is to see if the node was in the MDG before we added
            try:
                e_node = existing_node(matcher, repo_node)
            except Exception as err:
                errors.write("Error while checking if the node " + gid + ":" +
                             aid + ":" + version + ":" + sha + " exists in " +
                             to_handle + ": " + repr(err) + "\n")
                continue

            if e_node is not None:
                print(e_node["coordinates"])

            if e_node is not None:
                repo_node = e_node
            else:
                repo_node["coordinates"] += ":" + sha

                if version != prev_version or (aid != prev_art
                                               and gid != prev_gid):
                    try:
                        tx.create(repo_node)
                    except Exception as err:
                        errors.write("Error while creating node " +
                                     repo_node["coordinates"] + " in " +
                                     to_handle + ": " + repr(err) + "\n")

            if aid == prev_art and gid == prev_gid:
                r_next = Relationship(repo_node, "NEXT", prev_node)
                try:
                    tx.merge(r_next, "Artifact", "coordinates")
                except Exception as err:
                    errors.write("Error while merging NEXT between " +
                                 repo_node["coordinates"] + " and " +
                                 prev_node["coordinates"] + " in " +
                                 to_handle + ": " + repr(err) + "\n")

            prev_gid, prev_art, prev_node, prev_version = (
                repo_node["groupID"], repo_node["artifact"], repo_node,
                repo_node["version"])

            deps.append((repo_node, repo_deps))
            tx.commit()

#  print("Done adding nodes and NEXT")

    for (node, dep_list) in deps:
        tx = MDG.begin()
        node_deps = purge_deps(dep_list)

        for dep in node_deps:
            dep_node, reason = find_dep_node(MDG, matcher, dep)

            if dep_node is None:
                exceptions.write(node["coordinates"] + ": could not" +
                                 " create dependency with " + dep[0] + ":" +
                                 dep[1] + ":" + dep[2][0] + "because " +
                                 reason + "\n")
                continue

            r_dep = Relationship(node, "DEPENDS_ON", dep_node)

            try:
                tx.merge(r_dep, "Artifact", "coordinates")
            except Exception as err:
                errors.write("Could not create dependency between " +
                             r_dep.start_node["coordinates"] + " and " +
                             r_dep.end_node["coordinates"] + "because " +
                             repr(err) + "\n")

        tx.commit()
Пример #33
0
class Neo4jQuery(object):
    def __init__(self, k, config):
        self.num_topics = k  # number of topics
        self.config = config  # config dict for database
        self.graph = Graph(auth=(config["user"],
                                 config["password"]))  # get Neo4J graph

    def construct_topic_vector(self, topic_indices):
        """ Construct the topic indicator vector
        Args:
            topic_indices:  List of topic indexes returned from LDA model
        Returns: indicator vector
        """
        value_str = ["0"] * self.num_topics
        for k in topic_indices:
            value_str[k - 1] = "1"
        return value_str

    def insert_journal(self):
        """ Insert Journal into Graph
        Returns: Query string, Query keys
        """
        return "CREATE (j:Journal {id:{id}, name:{name}, field:{field}, ranking:{ranking})", [
            "id", "name", "field", "ranking"
        ]

    def insert_paper(self):
        """ Insert Paper into Graph
        Returns: Query string, Query keys
        """
        return "CREATE (p:Paper {id:{id}, authors:{authors}, journal_id:{journal_id}, title:{title}, abstract:{abstract}})", [
            "id", "authors", "journal_id", "title", "abstract"
        ]

    def update_paper(self, col_names):
        """ Update Paper in Graph
        Returns: Query string, Query keys
        """
        self.num_topics  # Not really required
        alter_str = ','.join(["p." + x + "={" + x + "}" for x in col_names])
        col_names.append("id")
        return "MATCH (p:Paper) WHERE p.id={id} SET " + alter_str + ";", col_names

    def insert_topic(self, paper_id, topic_indices):
        """ Insert Paper-Topic Relationship into Graph
        Args:
            paper_id: the id of the paper inserted
            topic_indices: indicator topic vector
        Returns: Query string, Query keys
        """
        paper_topic_rel_str = "MATCH (p:Paper), (t:Topic) WHERE p.id={0} AND t.no IN [{1}] CREATE (p)-[:TopicOf]->(t)"
        value_str = self.construct_topic_vector(topic_indices)
        topic_nodes = []
        for i in range(len(value_str)):
            if value_str[i] == "1":
                topic_nodes.append(str(i))
        query_str = paper_topic_rel_str.format(paper_id, ','.join(topic_nodes))
        return query_str, []

    def delete_paper(self):
        return "MATCH (p:Paper) WHERE p.id={id} DETACH DELETE p", [
            "id"
        ]  # Deletes nodes and all edges

    def delete_topic(self):
        return "", []  # Empty bc delete_paper handles it

    def search_journal(self):
        return "MATCH (j:Journal)<-[:PUBLISHED]-(p:Paper) WHERE p.journal_id={id} RETURN p.id, p.authors, p.journal_id, p.title, j.ranking", [
            "id"
        ]

    def search_paper(self):
        return "MATCH (p:Paper) WHERE p.id={id} RETURN p.id, p.authors, p.journal_id, p.title", [
            "id"
        ]

    def search_authors(self):
        return "MATCH (p:Paper) WHERE p.authors=~ '.*{authors}.*' RETURN p.id, p.authors, p.journal_id, p.title", [
            "authors"
        ]

    def get_recommended_papers(self):
        """ Recommendation Query for calculating cosine-sim, ranking and returning results
        Returns: Query string, Query keys
        """
        # TODO: Remove p1.id < 3000
        q1 = "MATCH (j:Journal)<-[pub1:PUBLISHED]-(p1:Paper)-[r1:TopicOf]->(Topic) WHERE p1.id < 3000 AND j.ranking <> -1 "
        q2 = "WITH p1 AS p1, j AS j, algo.similarity.cosine({topic_vec}, collect(r1.score)) AS similarity "
        q3 = "RETURN  p1.id, round(similarity * 100) / 100, p1.abstract, p1.authors, p1.journal_id, p1.title "
        q4 = "ORDER BY similarity DESC, j.ranking LIMIT 10;"

        return q1 + q2 + q3 + q4, ["topic_vec"]

    def execute_query(self, query_str, args=[], commit=True):
        """ Execute query on Neo4J graph
        Args:
            query_str: the query-string structure returned by the methods
            args: argument values to use in the query
            commit: Commit query or no (NOT USED IN THIS CASE)
        Returns: (False, Error) or (True, Cursor)
        """
        tx = self.graph.begin()
        query_str, keys = query_str

        if len(query_str) == 0:
            return True, None  # No Query to execute

        if len(keys) == 0:
            assign_dict = {}
        else:
            assign_dict = dict(zip(keys, args))

        try:
            cursor = tx.run(query_str, assign_dict)
            tx.commit()
        except Exception as e:
            print("Error :" + str(e))
            return False, e

        return True, cursor

    def get_results(self, cursor_results):
        """ Parse results returned by the cursor of the database
        Args:
            cursor_results: Results returned by the cursor of this database
        Returns: list of values from cursor
        """
        parsed_results = []
        data = cursor_results.data()
        for r in data:
            row = r.values()
            parsed_results.append([str(x) for x in row])
        return parsed_results

    def close_db(self):
        # Do not need to close database since py2neo uses a stateless REST API
        return
Пример #34
0
def parse_evtx(evtx_list):
    event_set = pd.DataFrame(index=[],
                             columns=[
                                 "eventid", "ipaddress", "username",
                                 "logintype", "status", "authname"
                             ])
    count_set = pd.DataFrame(index=[],
                             columns=["dates", "eventid", "username"])
    ml_frame = pd.DataFrame(index=[], columns=["date", "user", "host", "id"])
    username_set = []
    domain_set = []
    admins = []
    domains = []
    ntmlauth = []
    deletelog = []
    policylist = []
    addusers = {}
    delusers = {}
    addgroups = {}
    removegroups = {}
    sids = {}
    hosts = {}
    dcsync_count = {}
    dcsync = {}
    dcshadow_check = []
    dcshadow = {}
    count = 0
    record_sum = 0
    starttime = None
    endtime = None

    if args.timezone:
        try:
            datetime.timezone(datetime.timedelta(hours=args.timezone))
            tzone = args.timezone
            print("[*] Time zone is %s." % args.timezone)
        except:
            sys.exit("[!] Can't load time zone '%s'." % args.timezone)
    else:
        tzone = 0

    if args.fromdate:
        try:
            fdatetime = datetime.datetime.strptime(args.fromdate,
                                                   "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  fdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] From date does not match format '%Y%m%d%H%M%S'.")

    if args.todate:
        try:
            tdatetime = datetime.datetime.strptime(args.todate, "%Y%m%d%H%M%S")
            print("[*] Parse the EVTX from %s." %
                  tdatetime.strftime("%Y-%m-%d %H:%M:%S"))
        except:
            sys.exit("[!] To date does not match format '%Y%m%d%H%M%S'.")

    for evtx_file in evtx_list:
        if args.evtx:
            with open(evtx_file, "rb") as fb:
                fb_data = fb.read()[0:8]
                if fb_data != EVTX_HEADER:
                    sys.exit("[!] This file is not EVTX format {0}.".format(
                        evtx_file))

            chunk = -2
            with Evtx(evtx_file) as evtx:
                fh = evtx.get_file_header()
                try:
                    while True:
                        last_chunk = list(evtx.chunks())[chunk]
                        last_record = last_chunk.file_last_record_number()
                        chunk -= 1
                        if last_record > 0:
                            record_sum = record_sum + last_record
                            break
                except:
                    record_sum = record_sum + fh.next_record_number()

        if args.xmls:
            with open(evtx_file, "r") as fb:
                fb_data = fb.read()
                if "<?xml" not in fb_data[0:6]:
                    sys.exit("[!] This file is not XML format {0}.".format(
                        evtx_file))
                record_sum += fb_data.count("<System>")
                del fb_data

    print("[*] Last record number is %i." % record_sum)

    # Parse Event log
    print("[*] Start parsing the EVTX file.")

    for evtx_file in evtx_list:
        print("[*] Parse the EVTX file %s." % evtx_file)

        for node, err in xml_records(evtx_file):
            if err is not None:
                continue
            count += 1
            eventid = int(node.xpath("/Event/System/EventID")[0].text)

            if not count % 100:
                sys.stdout.write("\r[*] Now loading %i records." % count)
                sys.stdout.flush()

            if eventid in EVENT_ID:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get(
                    "SystemTime")
                try:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                stime = datetime.datetime(*etime.timetuple()[:4])
                if args.fromdate or args.todate:
                    if args.fromdate and fdatetime > etime:
                        continue
                    if args.todate and tdatetime < etime:
                        endtime = stime
                        break

                if starttime is None:
                    starttime = stime
                elif starttime > etime:
                    starttime = stime

                if endtime is None:
                    endtime = stime
                elif endtime < etime:
                    endtime = stime

                event_data = node.xpath("/Event/EventData/Data")
                logintype = "-"
                username = "******"
                domain = "-"
                ipaddress = "-"
                hostname = "-"
                status = "-"
                sid = "-"
                authname = "-"

                if eventid == 4672:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if username not in admins and username != "-":
                        admins.append(username)
                elif eventid in [4720, 4726]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                    if eventid == 4720:
                        addusers[username] = etime.strftime(
                            "%Y-%m-%d %H:%M:%S")
                    else:
                        delusers[username] = etime.strftime(
                            "%Y-%m-%d %H:%M:%S")
                elif eventid == 4719:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if data.get(
                                "Name") in "CategoryId" and data.text != None:
                            category = data.text
                        if data.get(
                                "Name"
                        ) in "SubcategoryGuid" and data.text != None:
                            guid = data.text
                    policylist.append([
                        etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                        category,
                        guid.lower()
                    ])
                elif eventid in [4728, 4732, 4756]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            groupname = data.text
                        elif data.get(
                                "Name"
                        ) in "MemberSid" and data.text not in "-" and data.text != None:
                            usid = data.text
                    addgroups[
                        usid] = "AddGroup: " + groupname + "(" + etime.strftime(
                            "%Y-%m-%d %H:%M:%S") + ") "
                elif eventid in [4729, 4733, 4757]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            groupname = data.text
                        elif data.get(
                                "Name"
                        ) in "MemberSid" and data.text not in "-" and data.text != None:
                            usid = data.text
                    removegroups[
                        usid] = "RemoveGroup: " + groupname + "(" + etime.strftime(
                            "%Y-%m-%d %H:%M:%S") + ") "
                elif eventid == 4662:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        dcsync_count[username] = dcsync_count.get(username,
                                                                  0) + 1
                        if dcsync_count[username] == 3:
                            dcsync[username] = etime.strftime(
                                "%Y-%m-%d %H:%M:%S")
                            dcsync_count[username] = 0
                elif eventid in [5137, 5141]:
                    for data in event_data:
                        if data.get(
                                "Name"
                        ) in "SubjectUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"
                        if etime.strftime(
                                "%Y-%m-%d %H:%M:%S") in dcshadow_check:
                            dcshadow[username] = etime.strftime(
                                "%Y-%m-%d %H:%M:%S")
                        else:
                            dcshadow_check.append(
                                etime.strftime("%Y-%m-%d %H:%M:%S"))
                else:
                    for data in event_data:
                        if data.get("Name") in ["IpAddress", "Workstation"
                                                ] and data.text != None:
                            ipaddress = data.text.split("@")[0]
                            ipaddress = ipaddress.lower().replace(
                                "::ffff:", "")
                            ipaddress = ipaddress.replace("\\", "")

                        if data.get(
                                "Name"
                        ) == "WorkstationName" and data.text != None:
                            hostname = data.text.split("@")[0]
                            hostname = hostname.lower().replace("::ffff:", "")
                            hostname = hostname.replace("\\", "")

                        if data.get(
                                "Name"
                        ) in "TargetUserName" and data.text != None:
                            username = data.text.split("@")[0]
                            if username[-1:] not in "$":
                                username = username.lower() + "@"
                            else:
                                username = "******"

                        if data.get(
                                "Name"
                        ) in "TargetDomainName" and data.text != None:
                            domain = data.text

                        if data.get("Name") in [
                                "TargetUserSid", "TargetSid"
                        ] and data.text != None and data.text[0:2] in "S-1":
                            sid = data.text

                        if data.get("Name") in "LogonType":
                            logintype = int(data.text)

                        if data.get("Name") in "Status":
                            status = data.text

                        if data.get("Name") in "AuthenticationPackageName":
                            authname = data.text

                    if username != "-" and ipaddress != "::1" and ipaddress != "127.0.0.1" and (
                            ipaddress != "-" or hostname != "-"):
                        if ipaddress != "-":
                            event_series = pd.Series([
                                eventid, ipaddress, username, logintype,
                                status, authname
                            ],
                                                     index=event_set.columns)
                            ml_series = pd.Series([
                                etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                                ipaddress, eventid
                            ],
                                                  index=ml_frame.columns)
                        else:
                            event_series = pd.Series([
                                eventid, hostname, username, logintype, status,
                                authname
                            ],
                                                     index=event_set.columns)
                            ml_series = pd.Series([
                                etime.strftime("%Y-%m-%d %H:%M:%S"), username,
                                hostname, eventid
                            ],
                                                  index=ml_frame.columns)
                        event_set = event_set.append(event_series,
                                                     ignore_index=True)
                        ml_frame = ml_frame.append(ml_series,
                                                   ignore_index=True)
                        # print("%s,%i,%s,%s,%s,%s" % (eventid, ipaddress, username, comment, logintype))
                        count_series = pd.Series([
                            stime.strftime("%Y-%m-%d %H:%M:%S"), eventid,
                            username
                        ],
                                                 index=count_set.columns)
                        count_set = count_set.append(count_series,
                                                     ignore_index=True)
                        # print("%s,%s" % (stime.strftime("%Y-%m-%d %H:%M:%S"), username))

                        if domain != "-":
                            domain_set.append([username, domain])

                        if username not in username_set:
                            username_set.append(username)

                        if domain not in domains and domain != "-":
                            domains.append(domain)

                        if sid != "-":
                            sids[username] = sid

                        if hostname != "-" and ipaddress != "-":
                            hosts[hostname] = ipaddress

                        if authname in "NTML" and authname not in ntmlauth:
                            ntmlauth.append(username)

            if eventid == 1102:
                logtime = node.xpath("/Event/System/TimeCreated")[0].get(
                    "SystemTime")
                try:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%d %H:%M:%S") + datetime.timedelta(hours=tzone)
                except:
                    etime = datetime.datetime.strptime(
                        logtime.split(".")[0],
                        "%Y-%m-%dT%H:%M:%S") + datetime.timedelta(hours=tzone)
                deletelog.append(etime.strftime("%Y-%m-%d %H:%M:%S"))

                namespace = "http://manifests.microsoft.com/win/2004/08/windows/eventlog"
                user_data = node.xpath(
                    "/Event/UserData/ns:LogFileCleared/ns:SubjectUserName",
                    namespaces={"ns": namespace})
                domain_data = node.xpath(
                    "/Event/UserData/ns:LogFileCleared/ns:SubjectDomainName",
                    namespaces={"ns": namespace})

                if user_data[0].text != None:
                    username = user_data[0].text.split("@")[0]
                    if username[-1:] not in "$":
                        deletelog.append(username.lower())
                    else:
                        deletelog.append("-")
                else:
                    deletelog.append("-")

                if domain_data[0].text != None:
                    deletelog.append(domain_data[0].text)
                else:
                    deletelog.append("-")

    print("\n[*] Load finished.")
    print("[*] Total Event log is %i." % count)

    if not username_set:
        sys.exit(
            "[!] This event log did not include logs to be visualized. Please check the details of the event log."
        )

    tohours = int((endtime - starttime).total_seconds() / 3600)

    if hosts:
        event_set = event_set.replace(hosts)
    event_set["count"] = event_set.groupby([
        "eventid", "ipaddress", "username", "logintype", "status", "authname"
    ])["eventid"].transform("count")
    event_set = event_set.drop_duplicates()
    count_set["count"] = count_set.groupby(["dates", "eventid", "username"
                                            ])["dates"].transform("count")
    count_set = count_set.drop_duplicates()
    domain_set_uniq = list(map(list, set(map(tuple, domain_set))))

    # Learning event logs using Hidden Markov Model
    if hosts:
        ml_frame = ml_frame.replace(hosts)
    ml_frame = ml_frame.sort_values(by="date")
    if args.learn:
        print("[*] Learning event logs using Hidden Markov Model.")
        learnhmm(ml_frame, username_set,
                 datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate ChangeFinder
    print("[*] Calculate ChangeFinder.")
    timelines, detects, detect_cf = adetection(count_set, username_set,
                                               starttime, tohours)

    # Calculate Hidden Markov Model
    print("[*] Calculate Hidden Markov Model.")
    detect_hmm = decodehmm(ml_frame, username_set,
                           datetime.datetime(*starttime.timetuple()[:3]))

    # Calculate PageRank
    print("[*] Calculate PageRank.")
    ranks = pagerank(event_set, admins, detect_hmm, detect_cf, ntmlauth)

    # Create node
    print("[*] Creating a graph data.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    tx = GRAPH.begin()
    hosts_inv = {v: k for k, v in hosts.items()}
    for ipaddress in event_set["ipaddress"].drop_duplicates():
        if ipaddress in hosts_inv:
            hostname = hosts_inv[ipaddress]
        else:
            hostname = ipaddress
        tx.append(statement_ip, {
            "IP": ipaddress,
            "rank": ranks[ipaddress],
            "hostname": hostname
        })

    i = 0
    for username in username_set:
        if username in sids:
            sid = sids[username]
        else:
            sid = "-"
        if username in admins:
            rights = "system"
        else:
            rights = "user"
        ustatus = ""
        if username in addusers:
            ustatus += "Created(" + addusers[username] + ") "
        if username in delusers:
            ustatus += "Deleted(" + delusers[username] + ") "
        if sid in addgroups:
            ustatus += addgroups[sid]
        if sid in removegroups:
            ustatus += removegroups[sid]
        if username in dcsync:
            ustatus += "DCSync(" + dcsync[username] + ") "
        if username in dcshadow:
            ustatus += "DCShadow(" + dcshadow[username] + ") "
        if not ustatus:
            ustatus = "-"
        tx.append(
            statement_user, {
                "user": username[:-1],
                "rank": ranks[username],
                "rights": rights,
                "sid": sid,
                "status": ustatus,
                "counts": ",".join(map(str, timelines[i * 6])),
                "counts4624": ",".join(map(str, timelines[i * 6 + 1])),
                "counts4625": ",".join(map(str, timelines[i * 6 + 2])),
                "counts4768": ",".join(map(str, timelines[i * 6 + 3])),
                "counts4769": ",".join(map(str, timelines[i * 6 + 4])),
                "counts4776": ",".join(map(str, timelines[i * 6 + 5])),
                "detect": ",".join(map(str, detects[i]))
            })
        i += 1

    for domain in domains:
        tx.append(statement_domain, {"domain": domain})

    for _, events in event_set.iterrows():
        tx.append(
            statement_r, {
                "user": events["username"][:-1],
                "IP": events["ipaddress"],
                "id": events["eventid"],
                "logintype": events["logintype"],
                "status": events["status"],
                "count": events["count"],
                "authname": events["authname"]
            })

    for username, domain in domain_set_uniq:
        tx.append(statement_dr, {"user": username[:-1], "domain": domain})

    tx.append(
        statement_date, {
            "Daterange":
            "Daterange",
            "start":
            datetime.datetime(
                *starttime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S"),
            "end":
            datetime.datetime(
                *endtime.timetuple()[:4]).strftime("%Y-%m-%d %H:%M:%S")
        })

    if len(deletelog):
        tx.append(
            statement_del, {
                "deletetime": deletelog[0],
                "user": deletelog[1],
                "domain": deletelog[2]
            })

    if len(policylist):
        id = 0
        for policy in policylist:
            if policy[2] in CATEGORY_IDs:
                category = CATEGORY_IDs[policy[2]]
            else:
                category = policy[2]
            if policy[3] in AUDITING_CONSTANTS:
                sub = AUDITING_CONSTANTS[policy[3]]
            else:
                sub = policy[3]
            username = policy[1]
            tx.append(
                statement_pl, {
                    "id": id,
                    "changetime": policy[0],
                    "category": category,
                    "sub": sub
                })
            tx.append(statement_pr, {"user": username[:-1], "id": id})
            id += 1

    tx.process()
    tx.commit()
    print("[*] Creation of a graph data finished.")
Пример #35
0
   # html_rel = Relationship(new_node,"HTML text",html_text)
   # gp.create(html_rel)
    gp.commit()
def get_the_available_crawlers():
    crawlers = ["CRAWLER-2", "CRAWLER-3", "CRAWLER-4"]
    return crawlers




graph_database_location = "http://"+database+":7474/db/data/"
graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database
if delete_graph_history == "yes":
    graph.delete_all() # Delete all the previous made nodes and relationship
    print("DATABASE DELETED !")
gp = graph.begin()

coordinates = [] # create the list for coordinates
coordinates = generate_coordinates(width, height, coordinates)   # generates coordinates based on the diff and the resolution

coordinates = generate_random_coordinates(coordinates)  # already generated coordinates are shuffled randomly

chrome_options = Options()
chrome_options.add_extension(".\process_monitor.crx") # Adding the extension to chrome
# chrome_options.add_extension("C:\\Users\crawler\Desktop\Crawler\process_monitor.crx")
chromium_path = ".\chrome-win32\chrome.exe" # Use the portable chromium browser
# If chromium browser is not required then by removing the above chromium path, it will start using the default one
# The default will be developer google chrome.
# ONly Dev channel google chrome can support the extension used here. This extension used a particular API.
# The API used is "chrome.processes" and it is available only in the chrome dev-channel and chromium browser
chrome_options.binary_location = chromium_path
Пример #36
0
class Neo4j:
    labels = None
    relationship_types = None
    property_keys = None
    constraints = None
    indexes = None

    parameters = {}

    def __init__(self, host, port, username=None, password=None, ssl=False, timeout=None, bolt=None):
        if timeout is not None:
            http.socket_timeout = timeout

        host_port = "{host}:{port}".format(host=host, port=port)
        uri = "{scheme}://{host_port}/db/data/".format(scheme="https" if ssl else "http", host_port=host_port)

        self.graph = Graph(uri, user=username, password=password, bolt=bolt, secure=ssl)

        try:
            self.neo4j_version = self.graph.dbms.kernel_version
        except Unauthorized:
            raise AuthError(uri)
        except SocketError:
            raise ConnectionError(uri)

    def cypher(self, statement):
        error = False
        headers = []
        rows = []

        start = datetime.now()
        tx = self.graph.begin()

        try:
            result = tx.run(statement, self.parameters)
            headers = list(result.keys())
            rows = [[x[header] for header in headers] for x in result]
            tx.commit()
        except KeyboardInterrupt:
            tx.rollback()
            error = ""
        except Exception as e:
            error = e

        end = datetime.now()

        return {
            "headers": headers,
            "rows": rows,
            "duration": duration_in_ms(start, end),
            "error": error
        }

    def get_labels(self):
        if not self.labels:
            self.labels = sorted(self.graph.node_labels)
        return self.labels

    def get_relationship_types(self):
        if not self.relationship_types:
            self.relationship_types = sorted(self.graph.relationship_types)
        return self.relationship_types

    def get_property_keys(self):
        if not self.property_keys:
            self.property_keys = sorted(remote(self.graph).resolve("propertykeys").get().content)
        return self.property_keys

    def get_constraints(self):
        if not self.constraints:
            data = remote(self.graph).resolve("schema/constraint").get().content
            self.constraints = sort_dict_by_key(data, "label")
        return self.constraints

    def get_indexes(self):
        if not self.indexes:
            data = remote(self.graph).resolve("schema/index").get().content
            self.indexes = sort_dict_by_key(data, "label")
        return self.indexes

    def update_parameters(self, key, value):
        self.parameters[key] = value

    def refresh(self):
        self.labels = None
        self.relationship_types = None
        self.property_keys = None
        self.indexes = None
        self.constraints = None
        self.get_labels()
        self.get_relationship_types()
        self.get_property_keys()
        self.get_indexes()
        self.get_constraints()

    def print_labels(self):
        headers = ["Labels"]
        rows = [[x] for x in self.get_labels()]

        print(pretty_table(headers, rows))

    def print_relationship_types(self):
        headers = ["Relationship Types"]
        rows = [[x] for x in self.get_relationship_types()]

        print(pretty_table(headers, rows))

    def print_constraints(self):
        headers = ["Constraints"]
        constraints = self.get_constraints()
        rows = [[x] for x in self.format_constraints_indexes(constraints)]

        print(pretty_table(headers, rows))

    def print_indexes(self):
        headers = ["Indexes"]
        indexes = self.get_indexes()
        rows = [[x] for x in self.format_constraints_indexes(indexes)]

        print(pretty_table(headers, rows))

    def format_constraints_indexes(self, values):
        return [":{}({})".format(value["label"], ",".join(value["property_keys"])) for value in values]

    def print_schema(self):
        headers = ["Labels", "Relationship Types", "Constraints", "Indexes"]

        columns = [self.get_labels()[:]]
        columns.append(self.get_relationship_types()[:])
        columns.append(self.format_constraints_indexes(self.get_constraints()[:]))
        columns.append(self.format_constraints_indexes(self.get_indexes()[:]))

        max_length = len(max(columns, key=len))
        [x.extend([""] * (max_length - len(x))) for x in columns]
        rows = [[x[i] for x in columns] for i in range(max_length)]

        print(pretty_table(headers, rows))

    def print_profile(self, profile):
        planner = profile.arguments["planner"]
        version = profile.arguments["version"]
        runtime = profile.arguments["runtime"]

        print("")
        print("Planner: {}".format(planner))
        print("Version: {}".format(version))
        print("Runtime: {}".format(runtime))
        print("")

        headers = ["Operator", "Estimated Rows", "Rows", "DB Hits", "Variables"]
        rows = []

        for n in reversed(walk(profile)):
            operator = n.operator_type
            estimated_rows = int(n.arguments["EstimatedRows"])
            rows_ = n.arguments["Rows"]
            db_hits = n.arguments["DbHits"]
            variables = n.identifiers

            rows.append([operator, estimated_rows, rows_, db_hits, variables])

        print(pretty_table(headers, rows))
Пример #37
0
from py2neo import Graph, Node, Relationship

g = Graph()
tx = g.begin()
a = Node("Person", name="Alice")
tx.create(a)
b = Node("Person", name="Bob")
ab = Relationship(a, "KNOWS", b)
tx.create(ab)
tx.commit()
g.exists(ab)


"""
Sample Query
>>> from py2neo import Graph
>>> g = Graph()
>>> g.run("MATCH (a) WHERE a.name={x} RETURN a.name", x="Bob").evaluate()
u'Bob'
>>>
"""
Пример #38
0
    def handle(self, *args, **options):  # pylint: disable=unused-argument
        """
        Iterates through each course, serializes them into graphs, and saves
        those graphs to neo4j.
        """
        host = options['host']
        port = options['port']
        neo4j_user = options['user']
        neo4j_password = options['password']

        authenticate(
            "{host}:{port}".format(host=host, port=port),
            neo4j_user,
            neo4j_password,
        )

        graph = Graph(
            bolt=True,
            password=neo4j_password,
            user=neo4j_user,
            https_port=port,
            host=host,
            secure=True
        )

        mss = ModuleStoreSerializer()

        total_number_of_courses = len(mss.all_courses)

        for index, course in enumerate(mss.all_courses):
            # first, clear the request cache to prevent memory leaks
            RequestCache.clear_request_cache()

            log.info(
                "Now exporting %s to neo4j: course %d of %d total courses",
                course.id,
                index + 1,
                total_number_of_courses
            )
            nodes, relationships = mss.serialize_course(course.id)
            log.info(
                "%d nodes and %d relationships in %s",
                len(nodes),
                len(relationships),
                course.id
            )

            transaction = graph.begin()
            try:
                # first, delete existing course
                transaction.run(
                    "MATCH (n:item) WHERE n.course_key='{}' DETACH DELETE n".format(
                        six.text_type(course.id)
                    )
                )

                # now, re-add it
                self.add_to_transaction(nodes, transaction)
                self.add_to_transaction(relationships, transaction)
                transaction.commit()

            except Exception:  # pylint: disable=broad-except
                log.exception(
                    "Error trying to dump course %s to neo4j, rolling back",
                    six.text_type(course.id)
                )
                transaction.rollback()
Пример #39
0
UNWIND {categories} AS category
MERGE (c:Category {name: category})
MERGE (b)-[:IS_IN]->(c)
'''

merge_category_query = '''
MATCH (b:Business {id: {business_id}})
MERGE (c:Category {name: {category}})
CREATE UNIQUE (c)<-[:IS_IN]-(b)
'''



print("Beginning business batch")
with open('data/yelp_academic_dataset_business.json', 'r') as f:
	tx = db.begin()
	count = 0
	for b in (json.loads(l) for l in f):
		tx.run(create_business_query, b)
		count += 1
		if count >= 10000:
			tx.commit()
			tx = db.begin()
			print("Committing transaction")
			count = 0
	if count > 0:
		tx.commit()
		print("Committing transaction")


## Create spatial layer:
Пример #40
0
import requests
from py2neo import Graph, Node
import os
import pandas as pd
from string_converter import remove_non_alphaNumerics as remove_marks
import numpy as np

if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)
    tx = g.begin()

    #===================== RETURN GenericDrug object: list of dics, key: rxcui, id =====================#
    q1 = '''
    MATCH (gd:GenericDrug) RETURN id(gd), gd.rxcui
    '''
    gd_obj = g.run(q1)

    gd_lst = []
    for object in gd_obj:
        gd_dic = {}
        gd_dic['id'] = object['id(gd)']
        gd_dic['rxcui'] = object['gd.rxcui']
        gd_lst.append(gd_dic)

#===================== Create relation, Iterate genericDrug (faster, about 15276 interations)====================#
    q3 = '''
       MATCH (pc:Prescription) where pc.rxcui = {gd_rxcui}
       MATCH (gd:GenericDrug) where id(gd) = {id_gd}
       CREATE (pc)-[:PRESCRIBE]->(gd)
       '''
Пример #41
0
from py2neo import Graph, Node, Relationship
import sys
from time import sleep
graph_database_location = "http://192.168.100.53:7474/db/data/"
graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database

tx=graph.begin()

statement = 'Match (a:Main_Tab)-[c:Crawling_Complete]->(b:Completed) WHERE ((a.Crawler="CRAWLER-1")) RETURN c'
count=[]
flag_detected = 0
if (sys.argv[1] == "CRAWLER-1"):
    flag_detected = 1
else:
    while True:
        print("Okay I am gonna sleep for 30 seconds and Check again")
        sleep(30)
        
        cursor=tx.run(statement).data()
        print(cursor)
        if(len(cursor) != 0):
            for each in cursor:
                x=list(each.values())
                count.append(x[0])
        if (len(count) != 0):
            flag_detected = 1
            break

if( flag_detected == 1):
    print("Detected Completion")