Exemplo n.º 1
Arquivo: fb.py Projeto: amol9/fbstats
class FB():
	def __init__(self):
		db_path = joinpath(globals.data_dir, globals.db_name)

		#if not exists(db_path): 	create in db manager if not present
		#	log.error('db file not found')
		#	raise FBError

		self._db = DBManager(db_path)

		self._types = "(type = 46 OR type = 80 OR type = 128 OR type = 247 OR type = 308 OR type = 60)"
		self._fql_limit = globals.max_fb_api_calls

	def __del__(self):

	def do_fql(self, query):
			fbaccess = FBAccess(self._db)
			self._access_token = fbaccess.token
		except FBAccessException as fbae:
			raise FBError(fbae.message)

		if self._fql_limit == 0:
			log.error('FQL call limit exceeded')
			raise FBError
		self._fql_limit -= 1

		query_url = 'https://graph.facebook.com/fql?' + urllib.urlencode({'q' : query, 'access_token' : self._access_token})
			response = urllib.urlopen(query_url)
		except Exception as e:
			raise FBError

			data = response.read()
			jdata = json.loads(data)
			log.info('record count = %d'%len(jdata['data']))
		except Exception as e:
			raise FBError
		return jdata

	def get_friends(self):
		friends_query = """SELECT uid, first_name, middle_name, last_name, sex, relationship_status, pic, 
				significant_other_id, age_range, birthday_date, current_location, friend_count, 
				hometown_location FROM user WHERE uid IN (SELECT uid2 FROM friend WHERE uid1=me()) OR uid=me()"""

		jdata = self.do_fql(friends_query)

		users = self._db.query("SELECT * FROM user WHERE deleted = 0")
		user_ids = [i['id'] for i in users]

		for record in jdata['data']:
			for k in record:
				if (record[k] == ''):
					record[k] = None
			if (record['friend_count'] != None):
				record['friend_count'] = int(record['friend_count'])
			if (record['sex'] != None):
				record['sex'] = record['sex'][0]
			if (record['hometown_location'] != None):
				record['hometown_location'] = record['hometown_location']['name']
			if (record['current_location'] != None):
				record['current_location'] = record['current_location']['name']
			if (record['age_range'] != None):
				record['age_range'] = str(record['age_range']['min'])

			if record['uid'] in user_ids:
				user = [u for u in users if u['id'] == record['uid']][0]
				update_values = {}

				for key in record.keys():
					if key in ['uid']:
					if record[key] != user[key.encode()]:
						update_values[key.encode()] = record[key]

				if len(update_values) > 0:
					self._db.update('user', update_values, "id = \'%s\'"%record['uid'])
					print update_values
					print 'u ',; sys.stdout.flush()

				self._db.insert('user', (record['uid'], record['first_name'], record['middle_name'],
					record['last_name'], record['sex'], record['relationship_status'], record['pic'], 
					record['significant_other_id'], record['age_range'], record['birthday_date'], 
					record['current_location'], record['friend_count'], record['hometown_location'],
				log.info('new friend: %s %s'%(record['first_name'], record['last_name']))

		for user_id in user_ids:
			self._db.update('user', {'deleted': 1}, "id = \'%s\'"%user_id)
			user = [u for u in users if u['id'] == user_id][0]
			log.info('deleted friend: %s %s'%(user['first_name'], user['last_name']))


	def get_stream(self, start_time=None, end_time=None, cont=False):
		def db_insert(jdata):
			for record in jdata['data']:
				for k in record:
					if (record[k] == ''):
						record[k] = None

				self._db.insert('stream', (record['post_id'], record['actor_id'], record['created_time'], 
					record['type'], record['like_info']['like_count'], record['comment_info']['comment_count'], 
					1 if record['is_popular'] else 0, record['message'], record['share_count'], 
					record['permalink'], None, None, None, 0, 0, 0))	#last 3: updated(remove), update_likes, update_comments

				print '.',; sys.stdout.flush()

		if not (start_time and end_time):
			if cont:
				if os.path.exists('./end_time'):
					end_time = int(open('./end_time', 'r').read().strip())
					end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
				end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			start_time = end_time + int(timedelta(hours=-12).total_seconds())

		stream_query = """SELECT post_id, actor_id, created_time, type, like_info, comment_info, 
				is_popular, message, share_count, permalink FROM stream WHERE source_id """ 
		source_clause =	"IN (SELECT uid2 FROM friend WHERE uid1=me())"
		limit_clause = ' LIMIT 500'
		time_clause = ' AND created_time > %d AND created_time < %d'%(start_time, end_time)

		jdata = self.do_fql(stream_query + source_clause + time_clause)

		source_clause = "= me()"

		jdata = self.do_fql(stream_query + source_clause + time_clause)				

		if cont: open('end_time', 'w').write(str(start_time))

	def update_stream(self):
		post_ids_query = "SELECT post_id, like_count, comment_count, share_count FROM stream WHERE created_time > %d AND %s"\
				%(int(time()) + timedelta(days=-4).total_seconds(), self._types)
		result = self._db.query(post_ids_query)

		rc = len(result)
		while(rc > 0):
			post_ids = ''
			for i in range(len(result)-rc, len(result)-rc+20 if rc>20 else len(result)):
				post_ids += "'%s',"%result[i]['post_id']
			rc -= 20
			post_ids = post_ids.rstrip(',')

			stream_query = "SELECT post_id, like_info, comment_info, share_count FROM stream WHERE post_id IN (%s)"%(post_ids)
			jdata = self.do_fql(stream_query)

			def result_find(post_id):
				for row in result:
					if row['post_id'] == post_id:
						return row
			for record in jdata['data']:
				post_id = record['post_id']
				like_count = record['like_info']['like_count']
				comment_count = record['comment_info']['comment_count']
				share_count = record['share_count']

				row = result_find(post_id)
				likes_changed = row['like_count'] != like_count 
				comments_changed = row['comment_count'] != comment_count

				if likes_changed or comments_changed:
					update_query = """UPDATE stream SET like_count = %d, comment_count = %d, share_count = %d, 
							update_likes = %d, update_comments = %d WHERE post_id = '%s'"""\
							%(like_count, comment_count, share_count, 1 if likes_changed else 0, 1 if comments_changed else 0, post_id)
					print('.'),; sys.stdout.flush()

	def get_comments(self):
			post_ids_query2 = "SELECT post_id FROM stream LIMIT 30"
			post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM comment) OR update_comments = 1) AND comment_count > 0 AND 
					%s LIMIT 20"""%(self._types)
			result = self._db.query(post_ids_query)
			if len(result) == 0:

			post_ids = ''
			for row in result:
				post_ids += "'%s',"%row['post_id']
			post_ids = post_ids.rstrip(',')
			like_query = "SELECT post_fbid, post_id, fromid FROM comment WHERE post_id IN (%s)"%post_ids 	#AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids
			jdata = self.do_fql(like_query)

			for record in jdata['data']:
				self._db.insert('comment', (record['post_fbid'], record['post_id'], record['fromid']))
				print '.',; sys.stdout.flush()

			self._db.query("UPDATE stream SET update_comments = 0 WHERE post_id in (%s)"%(post_ids))

	def get_likes(self):
			post_ids_query = """SELECT post_id FROM stream WHERE (post_id NOT IN 
					(SELECT DISTINCT post_id FROM like) OR update_likes = 1) AND like_count > 0 AND 
					%s LIMIT 20"""%(self._types)
			result = self._db.query(post_ids_query)
			if len(result) == 0:

			post_ids = ''
			for row in result:
				post_ids += "'%s',"%row['post_id']
			post_ids = post_ids.rstrip(',')
			like_query = "SELECT post_id, user_id FROM like WHERE post_id IN (%s)"%post_ids 	#AND user_id IN (SELECT uid2 FROM friend WHERE uid1=me())"%post_ids
			jdata = self.do_fql(like_query)

			for record in jdata['data']:
				self._db.insert('like', (record['post_id'], record['user_id']))

				print '.',; sys.stdout.flush()

			self._db.query("UPDATE stream SET update_likes = 0 WHERE post_id in (%s)"%(post_ids))

	def clean_duplicates(self):
		delete_dup_query = None
		with open(joinpath(dirname(realpath(__file__)), 'clean_stream.sql'), 'r') as f:
			delete_dup_query = f.read()


	def render_graph(self, start=None, end=None):
		drop_temp_tables = "drop table likejoin; drop table result;"

		filter_self_likes = True
		filter_self_comments = True

		time_period_clause = ""
		if start and end:
			time_period_clause = "AND s.created_time BETWEEN %d AND %d"%(self.get_timestamp(start), self.get_timestamp(end))

		likejoin_query = """CREATE TEMP TABLE likejoin AS 
				SELECT s.source_id AS user1, l.user_id AS user2 FROM stream AS s 
				JOIN like AS l 
				ON s.post_id = l.post_id %s WHERE (l.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
				%("AND s.source_id <> l.user_id" if filter_self_likes else "", time_period_clause) 
		commentjoin_query = """INSERT INTO likejoin 
				SELECT s.source_id AS user1, c.user_id AS user2 FROM stream AS s 
				JOIN comment AS c 
				ON s.post_id = c.post_id %s WHERE (c.user_id IN (SELECT id FROM user WHERE deleted = 0)
				AND s.source_id IN (SELECT id FROM user WHERE deleted = 0)) %s"""\
				%("AND s.source_id <> c.user_id" if filter_self_comments else "", time_period_clause) 

		result_query = """CREATE TEMP TABLE result AS 
				SELECT min(t1.user1, t1.user2) AS fuser1, max(t1.user1, t1.user2) AS fuser2,
				t1.c+coalesce(t2.c, 0) AS count 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t1 
				(SELECT *, count(user1) AS c FROM likejoin GROUP BY user1, user2) AS t2 
				ON t1.user2 = t2.user1 AND t1.user1 = t2.user2 AND t1.user1 <> t1.user2 
				GROUP BY fuser1, fuser2"""

		users_query= """SELECT id, first_name, last_name FROM user WHERE id IN 
				(SELECT fuser1 FROM result UNION SELECT fuser2 FROM result)"""


		min_max_count = self._db.query("SELECT min(count) as min_count, max(count) as max_count FROM result")[0]

		g = Graph(min_max_count['min_count'], min_max_count['max_count'])

		users = self._db.query(users_query)
		for user in users:
			g.add_node(str(user['id']), str(user['first_name'] + '.' + user['last_name'][0]))

		likes = self._db.query("select * from result;")
		for like in likes:
			g.add_edge(str(like['fuser1']), str(like['fuser2']), like['count'])


	def render_plot(self, type, count=10, first_name=None, last_name=None):
		plots = {}
		plots['top_posts'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT count(*) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
				title = 'Top %d Posts'%(count), x = 'name')

		plots['top_likes'] = PlotInfo(query = """SELECT s.count, u.first_name || '.' || substr(u.last_name, 1, 1) AS name FROM 
				(SELECT sum(like_count) AS count, source_id FROM stream WHERE %s GROUP BY source_id) AS s JOIN user AS u 
				ON s.source_id = u.id WHERE u.deleted = 0 ORDER BY s.count DESC LIMIT %d"""%(self._types, count),\
				title = 'Top %d Likes'%(count), x = 'name')

		plots['user_posts'] = PlotInfo(query = """select count(post_id) as count , strftime("%%m/%%d", datetime(created_time, 'unixepoch')) as day 
					from stream where source_id in (select id from user
					where first_name = '%s' and last_name = '%s') and %s group by day"""%(first_name, last_name, self._types),\
					title = 'Timeline: %s %s'%(first_name, last_name), x = 'day')

		result = self._db.query(plots[type].query)
		data = ''
		count = len(result)
		for row in result:
			data += '%s %s\n'%(row['count'], row[plots[type].x])

		plot_filepath = joinpath(dirname(realpath(__file__)), 'plot.gp')
		plot = Popen(["gnuplot", "-e",  "title='%s'"%(plots[type].title), "-e",\
					"size = '%d, %d'"%(count*60, int(count*60/1.6)), plot_filepath],\

	def add_job_period(self, start=None, end=None):
		if start and end:
			start_timestamp = (datetime.strptime(start, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds() 
			end_timestamp = (datetime.strptime(end, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds() 
			start_timestamp = self._db.query("SELECT MAX(end_time) FROM job_period")[0][0]
			if start_timestamp is not None and start_timestamp > time():

			if start_timestamp is None:
				start_timestamp = int(time())

			end_timestamp = start_timestamp + int(timedelta(days=globals.job_period_days).total_seconds())

		while start_timestamp < end_timestamp:
			self._db.insert('job_period', (start_timestamp, start_timestamp + timedelta(hours=12).total_seconds(), 0))
			start_timestamp += timedelta(hours=12).total_seconds()

	def get_stream_job(self):
		time_periods_query = "SELECT * FROM job_period WHERE end_time <= %d AND get_count < 2"%(int(time()) - timezone)
		time_periods = self._db.query(time_periods_query)
		for tp in time_periods:
			self.get_stream(tp['start_time'], tp['end_time'])
			self._db.query("UPDATE job_period SET get_count = %d WHERE end_time = %d"%(tp['get_count'] + 1, tp['end_time']))

	def get_timestamp(self, date_string):
		return (datetime.strptime(date_string, '%d%b%Y') - datetime(1970, 1, 1)).total_seconds()

	def get_photos(self, start_time=None, end_time=None, cont=False):
		def db_insert(jdata):
			for record in jdata['data']:
				for k in record:
					if (record[k] == ''):
						record[k] = None

				self._db.insert('photo', (record['pid'], record['aid'], record['caption'], 
					record['comment_info']['comment_count'], record['like_info']['like_count'], record['created'], 
					record['link'], record['owner'], record['place_id'], record['src_big'], 0, 0))

				print '.',; sys.stdout.flush()

		if not (start_time and end_time):
			if cont:
				if os.path.exists('./end_time'):
					end_time = int(open('./end_time', 'r').read().strip())
					end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
				end_time = int(time()) #+ int(timedelta(days=-6).total_seconds())
			start_time = end_time + int(timedelta(hours=-12).total_seconds())

		stream_query = """SELECT pid, aid, caption, comment_info, like_info, created, 
				link, owner, place_id, src_big FROM photo WHERE owner """ 
		source_clause =	"IN (SELECT uid2 FROM friend WHERE uid1=me())"
		limit_clause = ' LIMIT 500'
		time_clause = ' AND created > %d AND created < %d'%(start_time, end_time)

		jdata = self.do_fql(stream_query + source_clause) # + time_clause)

		source_clause = "= me()"

		jdata = self.do_fql(stream_query + source_clause) # + time_clause)				

		if cont: open('end_time', 'w').write(str(start_time))
