Пример #1
0
    def getProcessed(self):

        all_posts = self.g.getHome(1000, False)

        # key terms
        pat_keywords = [' pats', ' patriots']
        # stars
        pat_keywords.extend([
            'brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl',
            'football'
        ])
        #all active players
        #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone    s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc    hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead'])

        #key terms
        giant_keywords = ['giants']
        # stas
        giant_keywords.extend(['manning', 'coughlin', 'superbowl'])
        #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon'])

        #sort by likes
        all_posts = sorted(all_posts, key=lambda (i): self._fieldCount(i))

        #segregate by types
        pr = ap(all_posts)
        photos = pr.getByKeyValue('type', 'photo')
        posts = pr.getByKeyValue('type', ['status', 'link', 'checkin'])

        #get matching posts
        fields = ['message', 'link', 'name', 'caption', 'description']

        pr_posts = ap(posts)
        giant_posts = pr_posts.searchPosts(fields, giant_keywords, True)
        pat_posts = pr_posts.searchPosts(fields, pat_keywords, True)

        pr_photos = ap(photos)
        giant_photos = pr_photos.searchPosts(fields, giant_keywords)
        pat_photos = pr_photos.searchPosts(fields, pat_keywords)

        giant_users = ap(giant_posts + giant_photos).groupByUid(False)
        pat_users = ap(pat_posts + pat_photos).groupByUid(False)

        #numerical stats
        response = {}
        response['patriots'] = {}
        response['patriots']['statuses'] = pat_posts
        response['patriots']['photos'] = pat_photos
        response['patriots']['users'] = pat_users
        response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(
            pat_posts).countLikes()
        response['patriots']['comment_count'] = ap(
            pat_photos).countComments() + ap(pat_posts).countComments()

        response['giants'] = {}
        response['giants']['statuses'] = giant_posts
        response['giants']['photos'] = giant_photos
        response['giants']['users'] = giant_users
        response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(
            giant_posts).countLikes()
        response['giants']['comment_count'] = ap(
            giant_photos).countComments() + ap(giant_posts).countComments()

        #count active friends
        active_friends = []
        for u in pat_users:
            active_friends.append(u['id'])
        for u in giant_users:
            active_friends.append(u['id'])

        for p in all_posts:
            if 'comments' in p and 'data' in p['comments']:
                for c in p['comments']['data']:
                    active_friends.append(c['from']['id'])
        active_friend_count = len(Counter(active_friends))

        response['active_friends'] = {'count': active_friend_count}

        #dump data into mongo
        self.db.users.insert({
            'username': self.g.getUsername(),
            'data': self.g.getUser()
        })
        self.db.tokens.insert({
            'username': self.g.getUsername(),
            'token': self.g.getToken()
        })
        self.db.feed.insert({
            'username': self.g.getUsername(),
            'posts': all_posts
        })

        return json.dumps(response)
Пример #2
0
    def doFeed(self):
        v = {}

        #feed_all = self.__getFileObj('data/feed.json')
        feed_all = self.g.getFeed()

        #print 'PROCESSING FEED'
        #
        #most liked
        #
        feed_all = sorted(feed_all, key=lambda (i): self._fieldCount(i))
        #print '\nMOST LIKED:'
        v['most_liked'] = ap(feed_all).getTopPosts(5)

        #
        # Comments
        #
        feed_all = sorted(feed_all,
                          key=lambda (i): self._fieldCount(i, 'comments'))
        #print '\nMOST COMMENTS:'
        v['most_comments'] = ap(feed_all).getTopPosts(3)

        #
        # RATE
        #
        begin = getTime(feed_all[-1]['updated_time'])
        end = getTime(feed_all[0]['updated_time'])
        duration = end - begin
        dur_seconds = duration.total_seconds()
        v['time'] = {
            'begin': str(begin),
            'end': str(end),
            'duration': dur_seconds
        }

        #
        # BY TIME OF DAY
        #
        by_hour = {}
        #preallocate
        for i in range(24):
            by_hour[i] = []

        for e in feed_all:
            time = getTime(e['updated_time'])
            by_hour[time.hour].append(e)

        #print '\nUSAGE BY HOUR'
        v['time']['posts'] = []
        for i in range(24):
            #print '%i: %s' % (i, '#' * len(by_hour[i]))
            v['time']['posts'].append(len(by_hour[i]))

        #
        # TYPE METRICS COUNTS
        #

        #print '\nTYPE COUNTS'
        v['type'] = {}
        for s in ['photo', 'link', 'status', 'checkin']:
            #print s.upper()

            feed_filtered = ap(feed_all).getByKeyValue('type', s)
            feed_filtered = sorted(feed_filtered,
                                   key=lambda (i): self._fieldCount(i))

            #print 'total of type %s: %i' % (s, len(feed_filtered))
            #print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24)

            #print 'top:'
            top = ap(feed_filtered).getTopPosts(3)
            v['type'][s] = {'count': len(feed_filtered), 'top': top}

        #
        # BY FRIEND
        #
        feed_uid = ap(feed_all).groupByUid()

        v['friend'] = {'active': [], 'liked': [], 'ratio': []}

        #print '\nMOST ACTIVE FRIENDS'

        for i in range(1, 6):
            obj = {}

            obj['name'] = feed_uid[-i]['posts'][0]['from']['name']
            #print obj['name']

            for s in ['photo', 'link', 'status', 'checkin']:
                obj[s + '_count'] = len(
                    ap(feed_uid[-i]['posts']).getByKeyValue('type', s))
                #print ' %s: %i,' % (s, obj[s+'_count']),

            obj['total_count'] = feed_uid[-i]['count']
            #print 'total: %i' % obj['total_count']
            v['friend']['active'].append(obj)

        #print '\nMOST OVERALL LIKES BY FRIEND'
        feed_uid = sorted(feed_uid, key=lambda (i): i['likes'])
        for i in range(1, 6):
            v['friend']['liked'].append({
                'name':
                feed_uid[-i]['posts'][0]['from']['name'],
                'like_count':
                feed_uid[-i]['likes']
            })
            #print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes'])

        #print '\nHIGHEST LIKE RATIO OF A FRIEND'
        def ratio(i):
            if not i['likes'] or not i['count']:
                return 0
            return i['likes'] / float(i['count'])

        feed_uid = sorted(feed_uid, key=lambda (i): ratio(i))
        for i in range(1, 6):
            fid = feed_uid[-i]
            obj = {}
            obj['name'] = fid['posts'][0]['from']['name']
            obj['like_ratio'] = float(fid['likes']) / fid['count']
            obj['like_count'] = fid['likes']
            obj['post_count'] = fid['count']
            #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
            v['friend']['ratio'].append(obj)

        status_length = []
        sum = 0
        for e in feed_all:
            if e['type'] == 'status' and 'message' in e:
                status_length.append(len(e['message']))
                sum += len(e['message'])
        #print 'average status length: %f characters' % (float(sum)/len(status_length))
        v['average_status_length'] = float(sum) / len(status_length)
        return v
Пример #3
0
    def doHome(self):
        v = {}

        #home_all = self.__getFileObj('data/home.json')
        home_all = self.g.getHome()

        #
        # TEBOWS
        #
        ebows = []
        for e in home_all:
            if 'message' in e and 'ebow' in e['message']:
                ebows.append(e)

        #print '\nTEBOWS:'
        #for e in ebows:
        #print e['from']['name'] + ', ',
        #print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow'

        v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)}

        #
        # PRESIDENTS
        #
        pres = {
            'gingrich': [],
            'romney': [],
            'santorum': [],
            'paul': [],
            'obama': [],
            'colbert': []
        }

        def check_all(tuple, s):
            for t in tuple:
                if t in s:
                    return True
            return False

        for e in home_all:
            if 'message' not in e:
                continue
            msg = e['message'].lower()
            obj = {'message': e['message'], 'name': e['from']['name']}

            if check_all(('newt', 'gingrich'), msg):
                pres['gingrich'].append(obj)
            if check_all(('santorum', ), msg):
                pres['santorum'].append(obj)
                #print e['message']
            if check_all(('ron', 'paul'), msg):
                pres['paul'].append(obj)
            if check_all(('colbert', ), msg):
                pres['colbert'].append(obj)
            if check_all(('obama', 'barrack'), msg):
                pres['colbert'].append(obj)
        #for k in pres:
        #print k
        #print '#' * len(pres[k])

        v['presidents'] = pres

        #FRIENDS
        home_friends = []
        for e in home_all:
            if 'category' not in e['from']:
                home_friends.append(e)

        #
        #most liked
        #
        home_friends = sorted(home_friends,
                              key=lambda (i): self._fieldCount(i))
        #print '\nMOST LIKED:'
        v['most_liked'] = ap(home_friends).getTopPosts(3)

        #
        # Comments
        #
        home_friends = sorted(home_friends,
                              key=lambda (i): self._fieldCount(i, 'comments'))
        #print '\nMOST COMMENTS:'
        v['most_comments'] = ap(home_friends).getTopPosts(3)

        #
        # RATE
        #
        begin = getTime(home_all[-1]['updated_time'])
        end = getTime(home_all[0]['updated_time'])
        duration = end - begin
        dur_seconds = duration.total_seconds()
        v['time'] = {
            'begin': str(begin),
            'end': str(end),
            'duration': dur_seconds
        }

        #
        # BY TIME OF DAY
        #
        by_hour = {}
        #preallocate
        for i in range(24):
            by_hour[i] = []

        for e in home_all:
            time = getTime(e['updated_time'])
            by_hour[time.hour].append(e)

        #print '\nUSAGE BY HOUR'
        v['time']['posts'] = []
        for i in range(24):
            #print '%i: %s' % (i, '#' * len(by_hour[i]))
            v['time']['posts'].append(len(by_hour[i]))

        #
        # TYPE METRICS COUNTS
        #

        #print '\nTYPE COUNTS'
        v['type'] = {}
        for s in ['photo', 'link', 'status', 'checkin']:
            #print s.upper()

            home_filtered = ap(home_friends).getByKeyValue('type', s)
            home_filtered = sorted(home_filtered,
                                   key=lambda (i): self._fieldCount(i))
            if len(home_filtered) == 0:
                #print 'No posts of type: ' + s
                continue

            #print 'total of type %s: %i' % (s, len(home_filtered))
            #print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24)

            #print 'top:'
            top = ap(home_filtered).getTopPosts(3)
            v['type'][s] = {'count': len(home_filtered), 'top': top}

        #
        # BY FRIEND
        #
        home_uid = ap(home_friends).groupByUid()
        v['friend'] = {'active': [], 'liked': [], 'ratio': []}

        #print '\nMOST ACTIVE FRIENDS'
        vf = v['friend']['active']

        for i in range(1, 6):
            obj = {}
            obj['name'] = home_uid[-i]['posts'][0]['from']['name']
            #print obj['name']
            for s in ['photo', 'link', 'status', 'checkin']:
                obj[s + '_count'] = len(
                    ap(home_uid[-i]['posts']).getByKeyValue('type', s))
                #print ' %s: %i,' % (s, obj[s+'_count']),
            obj['total_count'] = home_uid[-i]['count']
            #print 'total: %i' % obj['total_count']
            vf.append(obj)

        #print '\nMOST OVERALL LIKES BY FRIEND'
        home_uid = sorted(home_uid, key=lambda (i): i['likes'])
        for i in range(1, 6):
            obj = {
                'name': home_uid[-i]['posts'][0]['from']['name'],
                'like_count': home_uid[-i]['likes']
            }
            #print '%s received %i likes' % (obj['name'], obj['like_count'])
            v['friend']['liked'].append(obj)

        #print '\nHIGHEST LIKE RATIO OF A FRIEND'
        def ratio(i):
            if not i['likes'] or not i['count']:
                return 0
            return i['likes'] / float(i['count'])

        home_uid = sorted(home_uid, key=lambda (i): ratio(i))
        for i in range(1, 6):
            hid = home_uid[-i]
            obj = {}
            obj['name'] = hid['posts'][0]['from']['name']
            obj['like_ratio'] = float(hid['likes']) / hid['count']
            obj['like_count'] = hid['likes']
            obj['post_count'] = hid['count']
            #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
            v['friend']['ratio'].append(obj)

        status_length = []
        sum = 0
        for e in home_all:
            if e['type'] == 'status' and 'message' in e:
                status_length.append(len(e['message']))
                sum += len(e['message'])

        v['average_status_length'] = float(sum) / len(status_length)
        #print 'average status length: %f characters' % (float(sum)/len(status_length))

        return v
Пример #4
0
	def doHome(self):
		v = {} 

		#home_all = self.__getFileObj('data/home.json')
		home_all = self.g.getHome()

		#
		# TEBOWS
		#
		ebows = []
		for e in home_all:
			if 'message' in e and 'ebow' in e['message']:
				ebows.append(e)

		#print '\nTEBOWS:'
		#for e in ebows:
			#print e['from']['name'] + ', ',
		#print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow'
		
		v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)}

		#
		# PRESIDENTS
		#
		pres = {'gingrich' : [], 
				'romney': [], 
				'santorum' : [], 
				'paul': [],
				'obama': [],
				'colbert': []}
		def check_all(tuple, s):
			for t in tuple:
				if t in s:
					return True
			return False

		for e in home_all:
			if 'message' not in e:
				continue
			msg = e['message'].lower()
			obj = {'message': e['message'], 'name': e['from']['name']}

			if check_all(('newt', 'gingrich'),msg):
				pres['gingrich'].append(obj)
			if check_all(('santorum',),msg):
				pres['santorum'].append(obj)
				#print e['message']
			if check_all(('ron', 'paul'),msg):
				pres['paul'].append(obj)
			if check_all(('colbert',),msg):
				pres['colbert'].append(obj)
			if check_all(('obama', 'barrack'),msg):
				pres['colbert'].append(obj)
		#for k in pres:
			#print k
			#print '#' * len(pres[k])

		v['presidents'] = pres

		#FRIENDS
		home_friends = []
		for e in home_all:
			if 'category' not in e['from']:
				home_friends.append(e)


		#
		#most liked
		#
		home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i))
		#print '\nMOST LIKED:'
		v['most_liked'] = ap(home_friends).getTopPosts(3)

		#
		# Comments 
		#
		home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i, 'comments'))
		#print '\nMOST COMMENTS:'
		v['most_comments'] = ap(home_friends).getTopPosts(3)


		#
		# RATE
		#
		begin = getTime(home_all[-1]['updated_time'])
		end = getTime(home_all[0]['updated_time'])
		duration = end - begin
		dur_seconds = duration.total_seconds()
		v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds}

		#
		# BY TIME OF DAY
		#
		by_hour = {}
		#preallocate
		for i in range(24):
			by_hour[i] = []

		for e in home_all:
			time = getTime(e['updated_time'])
			by_hour[time.hour].append(e)

		#print '\nUSAGE BY HOUR'
		v['time']['posts'] = []
		for i in range(24):
			#print '%i: %s' % (i, '#' * len(by_hour[i]))
			v['time']['posts'].append(len(by_hour[i]))


		
		#
		# TYPE METRICS COUNTS
		#
		
		#print '\nTYPE COUNTS'
		v['type'] = {}
		for s in ['photo', 'link', 'status', 'checkin']:
			#print s.upper()
			
			home_filtered = ap(home_friends).getByKeyValue('type', s)
			home_filtered = sorted(home_filtered, key=lambda(i): self._fieldCount(i))
			if len(home_filtered) == 0:
				#print 'No posts of type: ' + s
				continue

			#print 'total of type %s: %i' % (s, len(home_filtered))
			#print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24)

			#print 'top:'
			top = ap(home_filtered).getTopPosts(3)
			v['type'][s] = {'count' : len(home_filtered), 'top' : top} 
			
		#
		# BY FRIEND
		#
		home_uid = ap(home_friends).groupByUid()
		v['friend'] = {'active': [], 'liked':[], 'ratio':[]}

		#print '\nMOST ACTIVE FRIENDS'
		vf = v['friend']['active'] 

		for i in range(1,6):
			obj = {}
			obj['name'] = home_uid[-i]['posts'][0]['from']['name']
			#print obj['name']
			for s in ['photo', 'link', 'status', 'checkin']:
				obj[s+'_count'] = len(ap(home_uid[-i]['posts']).getByKeyValue('type', s))
				#print ' %s: %i,' % (s, obj[s+'_count']),
			obj['total_count'] = home_uid[-i]['count']
			#print 'total: %i' % obj['total_count']
			vf.append(obj)
		
		#print '\nMOST OVERALL LIKES BY FRIEND'
		home_uid = sorted(home_uid, key=lambda(i): i['likes'])
		for i in range(1,6):
			obj = {'name': home_uid[-i]['posts'][0]['from']['name'], 'like_count' : home_uid[-i]['likes']}
			#print '%s received %i likes' % (obj['name'], obj['like_count'])
			v['friend']['liked'].append(obj)
		
		#print '\nHIGHEST LIKE RATIO OF A FRIEND'
		def ratio(i):
			if not i['likes'] or not i['count']:
				return 0
			return i['likes']/float(i['count'])
		home_uid = sorted(home_uid, key=lambda(i): ratio(i))
		for i in range(1,6):
			hid = home_uid[-i]
			obj = {}
			obj['name'] = hid['posts'][0]['from']['name'] 
			obj['like_ratio'] = float(hid['likes'])/hid['count']
			obj['like_count'] = hid['likes']
			obj['post_count'] = hid['count']
			#print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
			v['friend']['ratio'].append(obj)

		status_length = []
		sum = 0
		for e in home_all:
			if e['type']=='status' and 'message' in e:
				status_length.append(len(e['message']))
				sum += len(e['message'])
		
		v['average_status_length'] = float(sum)/len(status_length)
		#print 'average status length: %f characters' % (float(sum)/len(status_length))
		
		return v
Пример #5
0
	def doFeed(self):
		v = {} 

		#feed_all = self.__getFileObj('data/feed.json')
		feed_all = self.g.getFeed()

		#print 'PROCESSING FEED'
		#
		#most liked
		#
		feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i))
		#print '\nMOST LIKED:'
		v['most_liked'] = ap(feed_all).getTopPosts(5)

		#
		# Comments 
		#
		feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i, 'comments'))
		#print '\nMOST COMMENTS:'
		v['most_comments'] = ap(feed_all).getTopPosts(3)

		#
		# RATE
		#
		begin = getTime(feed_all[-1]['updated_time'])
		end = getTime(feed_all[0]['updated_time'])
		duration = end - begin
		dur_seconds = duration.total_seconds()
		v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds}
		
		#
		# BY TIME OF DAY
		#
		by_hour = {}
		#preallocate
		for i in range(24):
			by_hour[i] = []

		for e in feed_all:
			time = getTime(e['updated_time'])
			by_hour[time.hour].append(e)

		#print '\nUSAGE BY HOUR'
		v['time']['posts'] = []
		for i in range(24):
			#print '%i: %s' % (i, '#' * len(by_hour[i]))
			v['time']['posts'].append(len(by_hour[i]))


		#
		# TYPE METRICS COUNTS
		#
		
		#print '\nTYPE COUNTS'
		v['type'] = {}
		for s in ['photo', 'link', 'status', 'checkin']:
			#print s.upper()
			
			feed_filtered = ap(feed_all).getByKeyValue('type', s)
			feed_filtered = sorted(feed_filtered, key=lambda(i): self._fieldCount(i))

			#print 'total of type %s: %i' % (s, len(feed_filtered))
			#print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24)

			#print 'top:'
			top = ap(feed_filtered).getTopPosts(3)
			v['type'][s] = {'count' : len(feed_filtered), 'top' : top} 
			
		#
		# BY FRIEND
		#
		feed_uid = ap(feed_all).groupByUid()
		
		v['friend'] = {'active': [], 'liked':[], 'ratio':[]}
		
		#print '\nMOST ACTIVE FRIENDS'
		
		for i in range(1,6):
			obj = {}
			
			obj['name'] = feed_uid[-i]['posts'][0]['from']['name']
			#print obj['name'] 
			
			for s in ['photo', 'link', 'status', 'checkin']:
				obj[s+'_count'] =  len(ap(feed_uid[-i]['posts']).getByKeyValue('type', s))
				#print ' %s: %i,' % (s, obj[s+'_count']),
			
			obj['total_count'] = feed_uid[-i]['count']
			#print 'total: %i' % obj['total_count'] 
			v['friend']['active'].append(obj)
			
		
		#print '\nMOST OVERALL LIKES BY FRIEND'
		feed_uid = sorted(feed_uid, key=lambda(i): i['likes'])
		for i in range(1,6):
			v['friend']['liked'].append({'name': feed_uid[-i]['posts'][0]['from']['name'], 'like_count' : feed_uid[-i]['likes']}) 
			#print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes'])
		
		#print '\nHIGHEST LIKE RATIO OF A FRIEND'
		def ratio(i):
			if not i['likes'] or not i['count']:
				return 0
			return i['likes']/float(i['count'])
		feed_uid = sorted(feed_uid, key=lambda(i): ratio(i))
		for i in range(1,6):
			fid = feed_uid[-i]
			obj = {}
			obj['name'] = fid['posts'][0]['from']['name'] 
			obj['like_ratio'] = float(fid['likes'])/fid['count']
			obj['like_count'] = fid['likes']
			obj['post_count'] = fid['count']
			#print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
			v['friend']['ratio'].append(obj)

		status_length = []
		sum = 0
		for e in feed_all:
			if e['type']=='status' and 'message' in e:
				status_length.append(len(e['message']))
				sum += len(e['message'])
		#print 'average status length: %f characters' % (float(sum)/len(status_length))
		v['average_status_length'] = float(sum)/len(status_length)
		return v
Пример #6
0
	def getProcessed(self):

		all_posts = self.g.getHome(1000, False)

		# key terms
		pat_keywords = [' pats', ' patriots']
		# stars
		pat_keywords.extend(['brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football'])
		#all active players
		#pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone    s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc    hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead'])

		#key terms
		giant_keywords = ['giants']
		# stas
		giant_keywords.extend(['manning', 'coughlin', 'superbowl'])
		#giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon'])
		
		#sort by likes
		all_posts = sorted(all_posts, key=lambda(i): self._fieldCount(i))


		#segregate by types
		pr = ap(all_posts)
		photos = pr.getByKeyValue('type', 'photo')
		posts = pr.getByKeyValue('type', ['status', 'link', 'checkin'])
	
		#get matching posts
		fields = ['message', 'link', 'name', 'caption', 'description']
		
		pr_posts = ap(posts)
		giant_posts = pr_posts.searchPosts(fields, giant_keywords, True)
		pat_posts = pr_posts.searchPosts(fields, pat_keywords, True)

		pr_photos = ap(photos)
		giant_photos = pr_photos.searchPosts(fields, giant_keywords)
		pat_photos = pr_photos.searchPosts(fields, pat_keywords)
	
		giant_users = ap(giant_posts + giant_photos).groupByUid(False)
		pat_users = ap(pat_posts + pat_photos).groupByUid(False)

		#numerical stats
		response = {}
		response['patriots'] = {}
		response['patriots']['statuses'] = pat_posts
		response['patriots']['photos'] = pat_photos
		response['patriots']['users'] = pat_users
		response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(pat_posts).countLikes()
		response['patriots']['comment_count'] = ap(pat_photos).countComments() + ap(pat_posts).countComments()

		response['giants'] = {}
		response['giants']['statuses'] = giant_posts
		response['giants']['photos'] = giant_photos
		response['giants']['users'] = giant_users
		response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(giant_posts).countLikes()
		response['giants']['comment_count'] = ap(giant_photos).countComments() + ap(giant_posts).countComments()

		#count active friends
		active_friends = []
		for u in pat_users: active_friends.append(u['id'])
		for u in giant_users: active_friends.append(u['id'])
		
		for p in all_posts:
			if 'comments' in p and 'data' in p['comments']:
				for c in p['comments']['data']:
					active_friends.append(c['from']['id'])
		active_friend_count = len(Counter(active_friends))

		response['active_friends'] = {'count': active_friend_count}

		#dump data into mongo
		self.db.users.insert({'username': self.g.getUsername(), 
					'data' : self.g.getUser()})
		self.db.tokens.insert({'username':self.g.getUsername(),
					 'token' : self.g.getToken()})
		self.db.feed.insert({'username': self.g.getUsername(),
					'posts': all_posts})


		return json.dumps(response)
Пример #7
0
from pymongo import Connection
from ArrayProcessor import ArrayProcessor as ap

posts = Connection().pspct.feed.find()
posts = [p for p in posts]
len(posts)
all_posts = []
for p in posts:
    for apost in p['posts']:
        all_posts.append(apost)
print len(all_posts)
print sum([len(p['posts']) for p in posts])
len(posts)
print max([len(p['posts']) for p in posts])
print min([len(p['posts']) for p in posts])
all_processor = ap(all_posts)
all_processor.searchPosts(
    ['message', 'link', 'name', 'caption', 'description'], 'manning')
keywords = [
    ' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl',
    'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl'
]


def s(val):
    return len(
        all_processor.searchPosts(
            ['message', 'link', 'name', 'caption', 'description'], val, True))


for k in keywords:
Пример #8
0
#! /usr/bin/python
from pymongo import Connection
from ArrayProcessor import ArrayProcessor as ap

posts = Connection().pspct.feed.find()
posts = [p for p in posts]
len(posts)
all_posts = []
for p in posts:
 for apost in p['posts']:
  all_posts.append(apost)
print len(all_posts)
print sum([len(p['posts']) for p in posts])
len(posts)
print max([len(p['posts']) for p in posts])
print min([len(p['posts']) for p in posts])
all_processor = ap(all_posts)
all_processor.searchPosts(['message','link','name','caption','description'], 'manning')
keywords = [' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl', 'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl']
def s(val): return len(all_processor.searchPosts(['message', 'link', 'name', 'caption', 'description'], val, True))
for k in keywords:
 print k,
 print s([k])

#import readline
#readline.write_history_file('history.txt')