Пример #1
0
 def update_group_posts(self, crawler, gid):
     active_posts = list(Post.objects.filter(closed=False, group=gid))
     # some rounding or conversion error has occured rarely from putting post.date fetched from to database (1-2 secs)
     timestamp = time.mktime(get_earliest_post_time(active_posts).timetuple()) - 10
     found_posts_ids = {}
     try:
         vk_posts = [p for p in crawler.get_posts_from_group("-" + gid, timestamp)]
     except FailedRequestError:
         print "failed to fetch posts for group " + str(gid)
         return
     for p in vk_posts:
         found_posts_ids[str(p['id'])] = p
     # cleansing
     posts_to_update = []
     for p in active_posts:
         if not p.pid in found_posts_ids:
             print "Deleting probably spam post " + p.pid
             p.delete()
         else:
             posts_to_update.append((p, found_posts_ids[p.pid]))
     try:
         comments_and_likes = crawler.get_comments_and_likes_for_posts([p[1] for p in posts_to_update], "-" + gid)
     except FailedRequestError:
         print "failed to fetch comments and likes for posts of group " + str(gid)
         return
     for (db_post, vk_post) in posts_to_update:
         comments = comments_and_likes[vk_post['id']]['comments']
         self.update_post(db_post, vk_post, comments)
         self.update_user_activity_for_post(db_post, comments, comments_and_likes[vk_post['id']]['likes'])                   
Пример #2
0
 def scan_auditory_activity(self, crawler, active_posts, gid):
     uids = {}
     active_posts_ids = [p.pid for p in active_posts]
     min_time = time.mktime(get_earliest_post_time(active_posts).timetuple()) - 10
     vk_posts = [p for p in crawler.get_posts_from_group("-" + gid, min_time)]
     active_vk_posts = [p for p in vk_posts if str(p['id']) in active_posts_ids]
     comments_and_likes = crawler.get_comments_and_likes_for_posts(active_vk_posts, "-" + gid).values()
     comments_user_ids = [str(c['uid']) for sublist in comments_and_likes for c in sublist['comments']]
     likes_user_ids = [str(l) for sublist in comments_and_likes for l in sublist['likes']]
     total_ids = comments_user_ids + likes_user_ids
     for uid in total_ids:
         if not uid in uids:
             uids[uid] = 0
         uids[uid] += 1
     result = {'users_1': 0, 'users_3': 0}
     for uid in uids:
         if uids[uid] >= 3:
             result['users_3'] += 1
         result['users_1'] += 1
     try:
         self.analyze_demogeo(gid, [profile for profile in crawler.get_profiles([uid for uid in uids.keys()])], 'active', crawler)
     except (FailedRequestError, DatabaseError) as e:
         print "failed to analyze demogeo for active auditory of group " + str(gid) + ": " + str(e)
     return result