Пример #1
0
def main():
    tag = Tag.nodes.get(name=Tag.FILTER_1)
    total_diffusion_instances = 0
    for node in tag.users:
        print('processing {}'.format(node.screen_name))
        diffusion_instances = 0
        
        # get tweets of transnational user
        query = (
            ' MATCH (user:Node {{screen_name:"{}"}})'.format(node.screen_name) +
            ' MATCH (user)-[:STATUS]->(statuses)'
            ' RETURN statuses.text, statuses.date, user.screen_name')
        
        statuses, meta = db.cypher_query(query)
        
        for index, status in enumerate(statuses):
            print('{}, {}/{}'.format(node.screen_name, index, len(statuses)), end='\r')
            
            # five days in unix time
            time_delta = 60 * 60 * 24 * 5
            
            # collect friend statuses before status
            query = (
                ' MATCH (user:Node {{screen_name:"{name}"}})'
                ' MATCH (user)-[:FRIEND]->(connections)'
                ' MATCH (connections)-[:STATUS]->(statuses)'
                ' MATCH (statuses {{lang:"en"}})'
                ' MATCH (statuses)<-[:STATUS]-(nodes)'
                ' WHERE statuses.date > {min_date} and statuses.date < {max_date}'
                ' RETURN statuses.text, statuses.date, nodes.screen_name'
                ' ORDER BY statuses.date DESC').format(
                    name=node.screen_name,
                    min_date=status[1] - time_delta,
                    max_date=status[1])
            friend_statuses, meta = db.cypher_query(query)
            
            # collect follow statuses posted after status
            query = (
                ' MATCH (user:Node {{screen_name:"{name}"}})'
                ' MATCH (user)-[:FOLLOWER]->(connections)'
                ' MATCH (connections)-[:STATUS]->(statuses)'
                ' MATCH (statuses {{lang:"en"}})'
                ' MATCH (statuses)<-[:STATUS]-(nodes)'
                ' WHERE statuses.date > {min_date} and statuses.date < {max_date}'
                ' RETURN statuses.text, statuses.date, nodes.screen_name'
                ' ORDER BY statuses.date DESC').format(
                    name=node.screen_name,
                    min_date=status[1],
                    max_date=status[1] + time_delta)
            follower_statuses, meta = db.cypher_query(query)
            
            if(friend_statuses and follower_statuses):
                # cluster and identify diffusion
                if identify_transnational_diffusion(node, len(friend_statuses),
                                                    friend_statuses + [status] + follower_statuses,
                                                    output=False):
                    diffusion_instances += 1
        total_diffusion_instances += diffusion_instances
        print('{} diffusion instances: {}'.format(node.screen_name, diffusion_instances))
    print('Total Diffusion Instances: {}'.format(total_diffusion_instances))
Пример #2
0
def delete_data():
    """
    Delete existing data
    """
    print 'Delete all nodes and relationships...'
    query = 'MATCH (n) DETACH DELETE n'
    db.cypher_query(query)
Пример #3
0
def changeDp(user, photo):
    usernode = User.nodes.get(name=user)
    photonode = Photos.nodes.get(name=photo)
    query = "match (a:User {name:{no_node}})-[b:Dp]->(c) delete b"
    db.cypher_query(query, {"no_node": user})
    usernode.currentdp.connect(photonode)
    return
Пример #4
0
 def handle_noargs(self, **options):
     db.cypher_query(
         '''
         MATCH (n)\
         OPTIONAL MATCH (n)-[r]-()\
         WITH n,r LIMIT 100000 DELETE n,r;\
         '''
     )
     subprocess.call(["python", "manage.py", "setup_loc_environ"])
     subprocess.call(["python", "manage.py", "setup_date_environ"])
Пример #5
0
    def clean_up():
        db.cypher_query(
                  '''
                  MATCH (n)\
                  OPTIONAL MATCH (n)-[r]-()\
                  WITH n,r LIMIT 100000 DELETE n,r;\
                  '''
              )

        exs = list(RootLocation.nodes.all())
        if not exs:
            RootLocation().save()

        exs = list(RootDate.nodes.all())
        if not exs:
            RootDate().save()
Пример #6
0
def isRequest(current_user, request_user):
    query = "Match (a:User {name:{name1}})-[:Request]->(b:User {name:{name2}}) return a,b"
    result, columns = db.cypher_query(query, {"name1": current_user, "name2": request_user})
    if result:
        return True
    else:
        return False
def get_top_k_cited_papers(request, name, year, k):
    query = "match (n1)-[:CITED]->(n2) " \
            "where n2.journal='%s' and n2.year=%s " \
            "return n2,count(n2) as count " \
            "order by count desc " \
            "limit %s" % (name, year, k)
    results, meta = db.cypher_query(query)
    return JsonResponse({"papers": [p.toDict() for p in [Article.inflate(row[0]) for row in results]] })
Пример #8
0
def getDp(user):
    query = "Match (a:User {name:{no_name}})-[:Dp]->(c) return c"
    value = ""
    result, columns = db.cypher_query(query, {"no_name": user})
    for row in result:
        value = Photos.inflate(row[0])
        # print value
    return value
Пример #9
0
    def remove_weak_pictures(self):
        """Removes all pictures that are not RELATED_TO any interest."""

        self.stdout.write("Removing all pictures that are not RELATED_TO any interest")

        query = "MATCH (p:Picture) OPTIONAL MATCH p--(i:Interest) WITH p, i WHERE i IS NULL DETACH DELETE p"

        results, meta = db.cypher_query(query)
Пример #10
0
 def cipher_query(self, query):
     """ Execute normal neo4j queries """
     from neomodel import db
     try:
         results, meta = db.cypher_query(query)
     except Exception as e:
         raise ("Failed to execute Cipher Query: " + query + "\n" + str(e))
     return results
Пример #11
0
    def remove_weak_interests(self):
        """Removes all interests that have less than 2 pictures RELATED_TO them."""

        self.stdout.write("Removing all interests that have less than 2 pictures RELATED_TO them")

        query = "MATCH (i:Interest) OPTIONAL MATCH i<-[r:RELATED_TO]-(p:Picture) WITH i, COUNT(p) AS rel WHERE rel < 2 OR rel IS NULL DETACH DELETE i"

        results, meta = db.cypher_query(query)
 def remove_duplicate_quests(self):
     skip = 0
     while True:
         query = 'MATCH (q:Quest) ' \
                 'RETURN DISTINCT q ' \
                 'SKIP %s LIMIT 25' % skip
         skip += 24
         res, _ = db.cypher_query(query)
         if not res.one:
             break
         for quest in [Quest.inflate(row[0]) for row in res]:
             query = 'MATCH (q:Quest {object_uuid:"%s"}) WHERE NOT ' \
                     '(q)-[]-(:Pleb) AND NOT (q)-[]-(:PublicOfficial) ' \
                     'AND NOT (q)-[]-(:Mission) WITH q ' \
                     'OPTIONAL MATCH (q)-[r]-() ' \
                     'DELETE q, r' % (quest.object_uuid)
             res, _ = db.cypher_query(query)
     cache.set(self.cache_key, True)
Пример #13
0
 def test_set_quest_about(self):
     self.quest.about = "some short summary"
     self.quest.save()
     setup_onboarding(self.quest, self.mission)
     query = 'MATCH (a:Mission {object_uuid: "%s"})-[:MUST_COMPLETE]->' \
             '(task:OnboardingTask {title: "%s"}) RETURN task' % (
                 self.mission.object_uuid, settings.QUEST_ABOUT_TITLE)
     res, _ = db.cypher_query(query)
     self.assertTrue(res.one['completed'])
Пример #14
0
 def test_set_bank_setup(self):
     self.quest.account_verified = "verified"
     self.quest.save()
     setup_onboarding(self.quest, self.mission)
     query = 'MATCH (a:Mission {object_uuid: "%s"})-[:MUST_COMPLETE]->' \
             '(task:OnboardingTask {title: "%s"}) RETURN task' % (
                 self.mission.object_uuid, settings.BANK_SETUP_TITLE)
     res, _ = db.cypher_query(query)
     self.assertTrue(res.one['completed'])
Пример #15
0
 def test_set_wallpaper(self):
     self.quest.wallpaper_pic = "something.png"
     self.quest.save()
     setup_onboarding(self.quest, self.mission)
     query = 'MATCH (a:Mission {object_uuid: "%s"})-[:MUST_COMPLETE]->' \
             '(task:OnboardingTask {title: "%s"}) RETURN task' % (
                 self.mission.object_uuid, settings.QUEST_WALLPAPER_TITLE)
     res, _ = db.cypher_query(query)
     self.assertTrue(res.one['completed'])
Пример #16
0
 def endorsed(self, request, owner_username):
     query = 'MATCH (q:Quest {owner_username:"******"})-' \
             '[:ENDORSES]->(m:Mission) RETURN m' % owner_username
     res, _ = db.cypher_query(query)
     page = self.paginate_queryset(
         [Mission.inflate(mission[0]) for mission in res])
     serializer = self.serializer_class(page, many=True,
                                        context={'request': request})
     return self.get_paginated_response(serializer.data)
Пример #17
0
def delete_by_id(pk):
    query = '''
            MATCH (n:Cipher)
            WHERE ID(n) = {id}
            DETACH DELETE n
            '''
    results, meta = db.cypher_query(query, dict(id=pk))
    print results
    print meta
def get_top_restaurant(uname):
    params = {
        'uname': uname,
    }
    query = "MATCH (U:User {name:$uname})-[r:RATED]->(res:Restaurant) with max(r.rating) as MAX_RATING \
    MATCH (U:User {name:$uname})-[r:RATED]->(res:Restaurant) where r.rating = MAX_RATING return res.rid order by rand() limit 2 "

    result, meta = db.cypher_query(query, params)
    return result
Пример #19
0
def getUser() -> User:
    dict = {'authUser': getAuth().email}
    results, columns = db.cypher_query(
        "MATCH (u:User) WHERE u.email <> {authUser} return u", params=dict)
    users = [User.inflate(row[0]) for row in results]
    userNames = [user.name for user in users]
    userChoosen = selectOptionInList(
        "Escoje el numero de usuario que deseas ver", userNames)
    return users[userChoosen]
Пример #20
0
def table_view(query):
    import pandas as pd
    results, columns = db.cypher_query(query)
    for line in results:
        for index, item in enumerate(line):
            if isinstance(item, Node):
                line[index] = item.__repr__()
    print(pd.DataFrame(results, columns=columns))
    return results, columns
Пример #21
0
    def clear_unseen(cls, username):
        """
        Sets all the notifications for the given user to True so that there
        are no more unread notifications.

        Doesn't return anything because if the query fails a Cypher Exception
        is thrown and a 500 error will propagate out.
        :param username:
        :return:
        """
        value = get_current_time().astimezone(pytz.utc)
        epoch_date = datetime(1970, 1, 1, tzinfo=pytz.utc)
        time_seen = float((value - epoch_date).total_seconds())
        query = 'MATCH (a:Pleb {username: "******"})<-[:NOTIFICATION_TO]-' \
                '(n:Notification) WHERE n.seen=False' \
                ' SET n.seen = True, ' \
                'n.time_seen = %s' % (username, time_seen)
        db.cypher_query(query)
Пример #22
0
 def coupon(self):
     if self.coupon_id:
         try:
             query = f"match (a) where ID(a) = {self.coupon_id} return a"
             result, meta = db.cypher_query(query)
             return result
         except Coupon.DoesNotExist:
             pass
     return None
Пример #23
0
def create_vote_relationship(content_id, voter_username, vote_active,
                             vote_type):
    try:
        query = 'MATCH (v:VotableContent {object_uuid:"%s"}), ' \
                '(p:Pleb {username:"******"}) ' \
                'CREATE UNIQUE (v)<-[vote:PLEB_VOTES]-(p) ' \
                'WITH v, vote, p SET vote.active=%s, ' \
                'vote.vote_type=%s RETURN v' % (
                    content_id, voter_username, vote_active, vote_type)
        res, _ = db.cypher_query(query)
    except (ConstraintViolation, Exception):
        query = 'MATCH (v:VotableContent {object_uuid:"%s"})' \
                '<-[vote:PLEB_VOTES]-(p:Pleb {username:"******"}) ' \
                'SET vote.active=%s, vote.vote_type=%s RETURN v' % (
                    content_id, voter_username, vote_active, vote_type)
        res, _ = db.cypher_query(query)

    return res
Пример #24
0
    def get_story_nodes(self, item, reverse=False, limit=False):
        """
        Create a raw cypher query for story of an artifact and query neo4j with it.

        :param node item: a Neo4j node whose story is requested by the user
        :kwarg bool reverse: specifies the direction to proceed from current node
            corresponding to the story_flow
        :kwarg bool limit: specifies if LIMIT keyword should be added to the created cypher query
        :return: story paths for a particular artifact
        :rtype: list
        """
        query = ''

        if reverse is True:
            rel_label = 'backward_relationship'
            node_label = 'backward_label'
        else:
            rel_label = 'forward_relationship'
            node_label = 'forward_label'

        curr_node_label = item.__label__
        if curr_node_label not in self.story_flow_list:
            raise ValidationError(
                'The story is not available for this kind of resource')

        while True:
            curr_node_info = self.story_flow(curr_node_label)
            if not curr_node_info:
                break

            if curr_node_label == item.__label__:
                query = """\
                    MATCH ({var}:{label}) WHERE id({var})= {node_id}
                    CALL apoc.path.expandConfig({var}, {{sequence:\'{label}
                    """.format(var=curr_node_label.lower(),
                               label=curr_node_label,
                               node_id=item.id)

            query += ', {0}, {1}'.format(curr_node_info[rel_label],
                                         curr_node_info[node_label])

            curr_node_label = curr_node_info[node_label]

        if query:
            query += """\
                \', minLevel:1}) YIELD path
                RETURN path
                ORDER BY length(path) DESC
                """

        if query and limit:
            query += ' LIMIT 1'

        results = []
        if query:
            results, _ = db.cypher_query(query)
        return results
Пример #25
0
 def get_student_on_semester_for_fieldofstudy(self, student, fieldofstudy):
     results, meta = db.cypher_query(
         'MATCH (s:Student)-[r:STUDIES]->(f:FieldOfStudy) WHERE (s.index_number='
         + str(student.index_number) + ' and f.name=\"' +
         fieldofstudy.name + '\" and f.faculty=\"' + fieldofstudy.faculty +
         '\" and f.start_years=\"' + fieldofstudy.start_years +
         '\") RETURN r.on_semester')
     semester = [row[0] for row in results]
     return (semester or [None])[0]
Пример #26
0
def get_node_by_id(cls, id):
    labels = ''.join(label_string(cls.inherited_labels()))
    results = db.cypher_query(
        'MATCH (n{}) WHERE ID(n)={{id}} RETURN n'.format(labels),
        dict(id=id)
    )
    if len(results[0]) == 0:
        raise cls.DoesNotExist('No node found with given ID')
    return cls.inflate(results[0][0]['n'])
Пример #27
0
 def get_rating_keyword_describes_course(self, keyword_course, course):
     results, meta = db.cypher_query(
         'MATCH (k:Keyword)-[r:DESCRIBES]->(c:Course) WHERE (k.word=\"' +
         keyword_course.word + '\" AND c.name=\"' + course.name +
         '\") RETURN DISTINCT r.rating')
     value = [row[0] for row in results]
     if len(value) == 0:
         return False
     return value[0]
Пример #28
0
 def get_all_fields_of_study_for_student(self, student):
     results, meta = db.cypher_query(
         'MATCH (s:Student)-[r:STUDIES]->(f:FieldOfStudy) WHERE s.index_number='
         + str(student.index_number) + ' RETURN f')
     field_of_study = [FieldOfStudy.inflate(row[0]) for row in results]
     if len(field_of_study) == 0:
         return None
     else:
         return field_of_study
Пример #29
0
 def get_past_professors(self, student):
     field_of_study = self.get_field_of_study_for_student(student)
     results, meta = db.cypher_query(
         'MATCH (p:Professor)-[r:TEACHES]->(c:Course)-[r2:HAS]->(f:FieldOfStudy) WHERE (f.name=\"'
         + field_of_study.name + '\" AND f.start_years=\"' +
         field_of_study.start_years + '\" AND f.faculty=\"' +
         field_of_study.faculty + '\" )  RETURN DISTINCT p')
     professors = [Professor.inflate(row[0]) for row in results]
     return professors
Пример #30
0
 def get_course_by_field_of_study(self, name, field_of_study):
     # taught_on_semester, is_elective=is_elective
     results, meta = db.cypher_query(
         'MATCH (c:Course)-[r:HAS]->(f:FieldOfStudy) WHERE (c.name=\"' +
         name + '\" AND f.name=\"' + field_of_study.name +
         '\" AND f.start_years=\"' + field_of_study.start_years +
         '\" and f.faculty=\"' + field_of_study.faculty + '\") RETURN c')
     course = [Course.inflate(row[0]) for row in results]
     return (course or [None])[0]
Пример #31
0
def existPhoto(photo, user):
    url = getUrl(user) + photo
    print url
    exists = []
    query = "MATCH (a:Photos{name: {no_name}}) return a"
    results, columns = db.cypher_query(query, {"no_name": url})
    if not results:
        return False
    else:
        return True
Пример #32
0
def existUser(user):
    exists = []
    query = "MATCH (a:User{name: {no_name}}) return a"
    results, columns = db.cypher_query(query, {"no_name": user})
    for row in results:
        exists = User.inflate(row[0])
    if not exists:
        return False
    else:
        return True
Пример #33
0
def generic_update_rel(rel_class, request, labels, params, node_id):
    labels = labels.replace(')-[', ')-[r')
    query = 'MATCH ({}) WHERE ID(r)={{this}} RETURN r'.format(labels)
    query_params = dict(this=node_id)
    results, meta = db.cypher_query(query, query_params)
    rel = rel_class.inflate(results[0][0])
    form = form_for_node_properties(rel, params.keys(), params)
    if form.is_valid():
        set_node_properties_from_params(rel, form.cleaned_data)
        rel.save()
        return rel
Пример #34
0
 def cypher(self, query):
     """ Execute normal neo4j queries """
     from neomodel import db
     try:
         results, meta = db.cypher_query(query)
     except Exception as e:
         raise Exception(
             "Failed to execute Cypher Query: %s\n%s" % (query, str(e)))
         return False
     # logger.debug("Graph query.\nResults: %s\nMeta: %s" % (results, meta))
     return results
Пример #35
0
 def published_links(self):
     results = db.cypher_query(
         """
         MATCH (n:Link)-[:ABOUT]->()<-[*]-(p:Person)
         WHERE ID(p)={id} AND n.publish_date IS NOT NULL
         RETURN DISTINCT n
         """,
         dict(id=self._id)
     )
     return [
         Link.inflate(result['n'])
         for result in results[0]
     ]
Пример #36
0
 def published_experiences(self):
     results = db.cypher_query(
         """
         MATCH (n:Experience)-[:WITH]->()<-[*]-(p:Person)
         WHERE ID(p)={id} AND n.publish_date IS NOT NULL
         RETURN DISTINCT n
         """,
         dict(id=self._id)
     )
     return [
         Experience.inflate(result['n'])
         for result in results[0]
     ]
Пример #37
0
    def get_or_create(cls, to_person, from_person, word):

        query = """
            MATCH (p:Person { address:'%s' }),(w:Word { value:'%s' })
            MERGE (p)-[r:HEARD {name:'%s'}]->(w)
            RETURN r
        """ % (to_person.address, word.value, from_person.address)
        print(query)
        results, meta = db.cypher_query(query)
        print(results)
        heards = [Heard.inflate(row[0]) for row in results]
        print('heards: {}'.format(heards))
        return heards[0]
Пример #38
0
 def all_roles(self):
     results = db.cypher_query(
         """
         MATCH (r:Role), (t:Topic)
         WHERE ID(t) = {id}
           AND (t)-[:RELATED_TO*1..]->(r)
         RETURN r
         """,
         dict(id=self._id)
     )
     return [
         Role.inflate(result['r'])
         for result in results[0]
     ]
Пример #39
0
 def all_projects(self):
     results = db.cypher_query(
         """
         MATCH (p:Project), (t:Topic)
         WHERE ID(t) = {id}
           AND (t)-[:RELATED_TO*1..]->(p)
         RETURN p
         """,
         dict(id=self._id)
     )
     return [
         Project.inflate(result['p'])
         for result in results[0]
     ]
Пример #40
0
def queryPubsOfAuthorOverTime_(name):
    query = "match (n1)-[:AUTHORED]->(n2) where n1.name='%s' return n2" % name
    results, meta = db.cypher_query(query)
    countPerYear = {}

    for row in results:
        article = Article.inflate(row[0])
        name = article.journal
        year = article.year
        if countPerYear.has_key(year):
            countPerYear[year]+=1
        else:
            countPerYear[year]=1

    freq = [{"name": year, "frequency": count} for year, count in countPerYear.iteritems()]
    return freq
    def resource_collection_response(cls, offset=0, limit=20):
        query = "MATCH (n) WHERE n:{label} RETURN n ORDER BY n.id SKIP {offset} LIMIT {limit}".format(
            label=cls.__name__,
            offset=offset,
            limit=limit)

        results, meta = db.cypher_query(query)
        data = dict()
        data['data'] = list()
        data['links'] = dict()
        data['links']['self'] = "{class_link}?page[offset]={offset}&page[limit]={limit}".format(
            class_link=cls.get_class_link(),
            offset=offset,
            limit=limit
        )
        data['links']['first'] = "{class_link}?page[offset]={offset}&page[limit]={limit}".format(
            class_link=cls.get_class_link(),
            offset=0,
            limit=limit
        )
        if int(offset) - int(limit) > 0:
            data['links']['prev'] = "{class_link}?page[offset]={offset}&page[limit]={limit}".format(
                class_link=cls.get_class_link(),
                offset=int(offset)-int(limit),
                limit=limit
            )

        if len(cls.nodes) > int(offset) + int(limit):
            data['links']['next'] = "{class_link}?page[offset]={offset}&page[limit]={limit}".format(
                class_link=cls.get_class_link(),
                offset=int(offset)+int(limit),
                limit=limit
            )

        data['links']['last'] = "{class_link}?page[offset]={offset}&page[limit]={limit}".format(
            class_link=cls.get_class_link(),
            offset=len(cls.nodes) - (len(cls.nodes) % int(limit)),
            limit=limit
        )

        list_of_nodes = [cls.inflate(row[0]) for row in results]
        for this_node in list_of_nodes:
            data['data'].append(this_node.get_resource_object())
        r = make_response(jsonify(data))
        r.status_code = http_error_codes.OK
        r.headers['Content-Type'] = CONTENT_TYPE
        return r
Пример #42
0
 def all_experiences(self):
     # There is a quirk in that a topic could be related to an experience,
     # or an experience could be with a topic. We want to find all such
     # experiences.
     results = db.cypher_query(
         """
         MATCH (e:Experience), (t:Topic)
         WHERE ID(t) = {id}
           AND ((e)-[:WITH]->(t) OR (t)-[:RELATED_TO*1..]->(e))
         RETURN e
         """,
         dict(id=self._id)
     )
     return [
         Experience.inflate(result['e'])
         for result in results[0]
     ]
Пример #43
0
def test_independent_property_name_get_or_create():
    class TestNode(StructuredNode):
        uid = UniqueIdProperty()
        name_ = StringProperty(db_property="name", required=True)

    # create the node
    TestNode.get_or_create({'uid': 123, 'name_': 'jim'})
    # test that the node is retrieved correctly
    x = TestNode.get_or_create({'uid': 123, 'name_': 'jim'})[0]

    # check database property name on low level
    results, meta = db.cypher_query("MATCH (n:TestNode) RETURN n")
    node_properties = _get_node_properties(results[0][0])
    assert node_properties['name'] == "jim"
    assert 'name_' not in node_properties

    # delete node afterwards
    x.delete()
Пример #44
0
def test_independent_property_name():
    class TestNode(StructuredNode):
        name_ = StringProperty(db_property="name")
    x = TestNode()
    x.name_ = "jim"
    x.save()

    # check database property name on low level
    results, meta = db.cypher_query("MATCH (n:TestNode) RETURN n")
    assert results[0][0].properties['name'] == "jim"

    assert not 'name_' in results[0][0].properties
    assert not hasattr(x, 'name')
    assert hasattr(x, 'name_')
    assert TestNode.nodes.filter(name_="jim").all()[0].name_ == x.name_
    assert TestNode.nodes.get(name_="jim").name_ == x.name_

    # delete node afterwards
    x.delete()
Пример #45
0
def queryPublicationsBetweenYears_(startYear, endYear):
    query = "match (n:Article) where n.year >= %s and n.year <= %s return n order by n.year" % (startYear, endYear)
    results, meta = db.cypher_query(query)
    journals = {}
    for row in results:
        article = Article.inflate(row[0])
        name = article.journal
        year = article.year
        if not journals.has_key(name):
            journals[article.journal] = {}
        journalDist = journals[article.journal]
        if journalDist.has_key(article.year):
            journalDist[year] += 1
        else:
            journalDist[year] = 1

    pubYearDist = []
    for name, journalDist in journals.iteritems():
        pubYearDist.append({
            "name":name,
            "articles": [[year,count] for year, count in journalDist.iteritems()],
            "total": sum(count for count in journalDist.itervalues())
        })
    return pubYearDist
Пример #46
0
 def all_roles_and_dates(self):
     results = db.cypher_query(
         """
         MATCH
             (person:Person)-[rel:PERFORMED]->(role:Role)
         WHERE
             id(person) = {id}
         RETURN
             role,
             rel.start_date as start_date,
             rel.end_date as end_date
         ORDER BY
             rel.start_date DESC, rel.end_date DESC
         """,
         dict(id=self._id)
     )
     return [
         (
             Role.inflate(result['role']),
             parse8601(result['start_date']) if result['start_date'] else datetime.date.today(),
             parse8601(result['end_date']) if result['end_date'] else datetime.date.today(),
         )
         for result in results[0]
     ]
Пример #47
0
def select_words(person_name):

    # find all words and total frequency
    min_freq = 20
    stdev_weight = 6
    words, query_items = db.cypher_query('match (w:Word)-[h:HEARD]-(p:Person) where p.address = \'{}\' '
                                         'OR h.name = \'{}\' return w.value, '
                                         'count(h.frequency)'.format(person_name, person_name))
    print(words)
    words_to_remove = []

    # remove words with freq too low
    appended_freq_list = []
    ttl_freq_list = []
    for word in list(words):
        word_freq = word[1]
        ttl_freq_list.append(word_freq)
        if word_freq < min_freq:
            #words.remove(word)
            words_to_remove.append(word[0])
            logging.info('Removing word, freq < {}: {}'.format(
                min_freq,
                word[0]
            ))
        else:
            appended_freq_list.append(word_freq)
    print(words)
    # find mean of words
    stdev = statistics.stdev(ttl_freq_list)
    mean = statistics.mean(appended_freq_list)

    logging.info('stdev:{} and mean:{}'.format(stdev, mean))

    # remove words with freq too high
    for word in list(words):
        word_freq = word[1]
        max_freq = mean+stdev_weight*stdev
        if word_freq > max_freq:
            #words.remove(word)
            words_to_remove.append(word[0])
            logging.info('Removing word, freq > {}: {}'.format(
                max_freq,
                word[0]
            ))

    print('TO REMOVE, {} words: {}'.format(
        len(words_to_remove), words_to_remove))
    print('ALL, {} words: {}'.format(
        len(words), words))

    word_vals = [w[0] for w in words]

    # Deactivate words in DB
    word_nodes_dict = {n.value: n for n in Word.nodes.all()}
    for word, word_node in word_nodes_dict.items():

        if word not in word_vals:
            continue

        # if word == 'streaming':
        #     print('HERE')
        #     sys.exit(1)

        state = not word in words_to_remove
        logging.debug('Word: {}, New: {}, Old: {}'.format(
            word, state, word_node.active
        ))
        if word_node.active != state:
            word_node.active = state
            word_node.save()

    logging.debug('Removed words: {}'.format(words_to_remove))
    logging.debug('Important Words: {}'.format(words))

    return words
Пример #48
0
def build_training_and_testing_sets(person_name):

    percent_training = 0.7
    select_words(person_name)

    heard_recv, query_items = db.cypher_query(
        'match (w:Word)-[h:HEARD]-(p:Person) where w.active = True and '
        'p.address=\'{}\' return w.value, '
        'h.frequency, h.name'.format(person_name)
    )

    heard_sent, query_items = db.cypher_query(
        'match (w:Word)-[h:HEARD]-(p:Person) where w.active = True and '
        'h.name=\'{}\' return w.value, '
        'h.frequency, p.address'.format(person_name)
    )

    logging.info('All words, sent only: {}'.format(len(heard_sent)))
    logging.info('All words, received only: {}'.format(len(heard_recv)))

    # Merge operation
    heard_words = heard_recv + heard_sent
    heard_words.sort()
    logging.info('All words, combined: {}'.format(len(heard_words)))

    for i in range(len(heard_words)-1):
        if heard_words[i] == heard_words[i-1]:
            heard_words[i][1] += heard_words[i-1][1]
            heard_words[i-1][1] = heard_words[i][1]
            logging.debug('Merge: {}, {}'.format(heard_words[i], heard_words[i-1]))

    # Deduplicate list, frequencies already added
    heard_words = list(set(tuple(word) for word in heard_words))

    logging.info('Unique words: {}'.format(len(list(set([w[0] for w in heard_words])))))

    words, freq, people = list(zip(*heard_words))
    distinct_people = list(set(people))
    logging.debug('Distict people: {}'.format(distinct_people))

    # find test and training matrices
    training_inx = round(len(distinct_people)*percent_training)

    logging.info('Chose {} people for training, {} for testing.'.format(
        training_inx,
        len(distinct_people) - training_inx
    ))
    logging.debug('Training people: {}'.format(distinct_people[:training_inx]))
    logging.debug('Testing people: {}'.format(distinct_people[training_inx:]))

    training_dict, training_relation = build_training_matrix(
        words, freq, people,
        distinct_people[:training_inx]
    )

    testing_dict, testing_relation = build_testing_matrix(
        words, freq, people,
        distinct_people[training_inx:]
    )

    # output training matrix to file
    train_filename = '{}.TRAIN'.format(re.search('%s(.*)%s' % ('<', '>'), person_name).group(1))
    with open(train_filename, 'w') as train_file:
        writer = csv.writer(train_file, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(["%s " % person for person in distinct_people[:training_inx]])
        writer.writerow(["%s " % entry for entry in training_dict])
        for i in range(len(training_relation)):
            row = [training_dict[entry][i] for entry in training_dict]
            row.insert(0, training_relation[i])
            writer.writerow(row)

    # output testing matrix to file
    test_filename = '{}.TEST'.format(re.search('%s(.*)%s' % ('<', '>'), person_name).group(1))
    with open(test_filename, 'w') as test_file:
        writer = csv.writer(test_file, delimiter=' ',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(["%s " % person for person in distinct_people[training_inx:]])
        writer.writerow(["%s " % entry for entry in testing_dict])
        for i in range(len(testing_relation)):
            row = [training_dict[entry][i] for entry in testing_dict]
            row.insert(0, testing_relation[i])
            writer.writerow(row)