def run(self): while True: time.sleep(30) videos = set([]) users = set([]) post_counts = {} jdata = {} jdata["total_posts"] = len(stat_data) total_posts = len(stat_data) for i in range(len(stat_data)): user_id = stat_data[i][0] video_id = stat_data[i][1] status_time = stat_data[i][2] if status_time < (time.time() - 86400): stat_data.pop(i) else: videos.add(video_id) users.add(user_id) if not video_id in post_counts.keys(): post_counts[video_id] = 0 post_counts[video_id] +=1 jdata["unique_users"] = len(users) jdata["unique_videos"] = len(videos) sorted_counts = sorted(post_counts.iteritems(), key=operator.itemgetter(1)) sorted_counts.reverse() jdata["top_100"] = sorted_counts[0:100] lock.acquire() sql_data = {"type":"facebook_posts", "added":str(time.time()), "data":json.dumps(jdata)} sql.insertRow(cursor, "parsed_data"+tableSuffix(), sql_data, True) cursor.connection.commit() lock.release()
def run(self): while True: time.sleep(30) t = int(time.time()) compiled_data = { 60: {}, 3600: {}, 86400: {} } #compile data for intervals of a minute, hour and day for k in self.status_data.keys(): for limit in compiled_data.keys(): if int(k) >= (t - limit): for event_name in self.status_data[k].keys(): if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0 compiled_data[limit][ event_name] += self.status_data[k][event_name] if int(k) < (t - 86400): self.status_data.pop(k) compiled_data["start_time"] = self.start_time compiled_data["compiled"] = t sql_data = { "script": self.file, "added": time.time(), "data": json.dumps(compiled_data) } cursor = sql.slytics1().connection.cursor() sql.insertRow(cursor, "script_statuses" + tableSuffix(), sql_data, False, True) cursor.connection.close() cursor.close()
def run(self): while True: lock.acquire() if not q.empty(): jdata = q.get() lock.release() status_id = jdata["id"] text = jdata["text"] status.event("statuses_parsed") urls = re.findall("(?P<url>https?://[^\s]+)", text) videos = [] for url in urls: u = url if needsExpansion(url)==True: u = expandURL(url) video_id = getVideoID(u) if video_id!=None and not video_id in videos: videos.append(video_id) if video_id!=None: lock.acquire() sql_data = {"original_url":url[:200], "expanded_url":u[:1000], "video_id":video_id} sql.insertRow(cursor, "youtube_urls", sql_data, True) cursor.connection.commit() lock.release() status.event("urls_found") lock.acquire() for video in videos: sql_data = {"id":video} sql.insertRow(cursor, "youtube_ids", sql_data, True) status.event("videos_found") cursor.connection.commit() lock.release() else: lock.release() time.sleep(0.1)
def run(self): while True: lock.acquire() if not q.empty(): jdata = q.get() lock.release() status_id = jdata["id"] user_id = status_id.split("_")[0] text = "" for key in ["message", "link", "description", "source"]: if key in jdata: text += " "+jdata[key] status.event("statuses_parsed") urls = re.findall("(?P<url>https?://[^\s]+)", text) videos = [] for url in urls: video_id = getVideoID(url) if video_id!=None and not video_id in videos: videos.append(video_id) if video_id!=None: lock.acquire() sql_data = {"original_url":url[:200], "expanded_url":url[:1000], "video_id":video_id} sql.insertRow(cursor, "youtube_urls", sql_data, True) cursor.connection.commit() lock.release() status.event("urls_found") lock.acquire() for video in videos: sql_data = {"id":video} sql.insertRow(cursor, "youtube_ids", sql_data, True) status.event("videos_found") stat_data.append([user_id, video, time.time()]) cursor.connection.commit() lock.release() else: lock.release() time.sleep(0.1)
def run(self): base_url = "/method/links.getStats?format=json&urls=" youtube_base = "youtube.com%2Fwatch%3Fv%3D" while True: lock.acquire() req_ids = [] while len(req_ids) < 850 and not q.empty(): req_ids.append(q.get()) if q.empty(): for ytid in get_ids.ids: q.put(ytid) lock.release() if len(req_ids) > 0: req_url = base_url for req_id in req_ids: req_url += youtube_base + req_id + "," error_thrown = False try: time.sleep(0.01) self.httpconn.request("GET", req_url) res = self.httpconn.getresponse().read() except socket.error as ex: error_thrown = True res = "{}" self.httpconn = httplib.HTTPSConnection("api.facebook.com") status.event("requests") retrieved = time.time() jdata = {} parsed = json.loads(res) for video in parsed: try: video_id = getVideoID(video["normalized_url"]) total_count = video["total_count"] lock.acquire() if video_id in video_ids_seen and total_counts[video_id]==total_count: status.event("data_unchanged") #data has remained unchanged since last request for this video else: video_ids_seen.add(video_id) total_counts.update({video_id:total_count}) jdata = video status.event("data_changed") lock.release() status.event("urls_polled") jdata.update({"retrieved":retrieved}) sql_data = {"data":json.dumps(jdata)} sql.insertRow(self.cursor, "facebook_polldata"+tableSuffix(), sql_data) self.conn.commit() except: error_thrown = True if error_thrown==True: lock.acquire() for req_id in req_ids: q.put(req_id) lock.release() status.event("request_errors")
def run(self): while True: time.sleep(30) t = int(time.time()) compiled_data = {60:{}, 3600:{}, 86400:{}} #compile data for intervals of a minute, hour and day for k in self.status_data.keys(): for limit in compiled_data.keys(): if int(k) >= (t - limit): for event_name in self.status_data[k].keys(): if not compiled_data[limit].has_key(event_name): compiled_data[limit][event_name] = 0 compiled_data[limit][event_name] += self.status_data[k][event_name] if int(k) < (t - 86400): self.status_data.pop(k) compiled_data["start_time"] = self.start_time compiled_data["compiled"] = t sql_data = {"script":self.file, "added":time.time(), "data":json.dumps(compiled_data)} cursor = sql.slytics1().connection.cursor() sql.insertRow(cursor, "script_statuses"+tableSuffix(), sql_data, False, True) cursor.connection.close() cursor.close()
def run(self): while True: lock.acquire() if not q.empty(): jdata = q.get() lock.release() status_id = jdata["id"] text = jdata["text"] status.event("statuses_parsed") urls = re.findall("(?P<url>https?://[^\s]+)", text) videos = [] for url in urls: u = url if needsExpansion(url) == True: u = expandURL(url) video_id = getVideoID(u) if video_id != None and not video_id in videos: videos.append(video_id) if video_id != None: lock.acquire() sql_data = { "original_url": url[:200], "expanded_url": u[:1000], "video_id": video_id } sql.insertRow(cursor, "youtube_urls", sql_data, True) cursor.connection.commit() lock.release() status.event("urls_found") lock.acquire() for video in videos: sql_data = {"id": video} sql.insertRow(cursor, "youtube_ids", sql_data, True) status.event("videos_found") cursor.connection.commit() lock.release() else: lock.release() time.sleep(0.1)
cursor = conn.cursor() c = sql.slytics1().connection ccursor = c.cursor() max_id = 0 table_suffix = tableSuffix() while True: cursor.execute("select id, data from facebook_polldata"+table_suffix+" where id > "+str(max_id)+" limit 500") res = cursor.fetchone() if res==None and table_suffix != tableSuffix(): table_suffix = tableSuffix() max_id = 0 while res: max_id = res[0] data = json.loads(res[1]) status.event("rows_processed") if "normalized_url" in data.keys(): vid = getVideoID(data["normalized_url"]) extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid) str_data = str(data["retrieved"])+" "+str(data["like_count"])+" "+str(data["share_count"])+" " if extant_data==None: sql_data = {"video":vid, "data":str_data} sql.insertRow(ccursor, "facebook_pollcount", sql_data) else: ccursor.execute("update facebook_pollcount set data = concat(data, '"+str_data+"') where video = '"+vid+"'") res = cursor.fetchone() cursor.connection.commit() ccursor.connection.commit() time.sleep(0.1)
except: conn = httplib.HTTPSConnection("graph.facebook.com") if parsed.has_key("data"): if parsed.has_key("paging"): locales[l]["since"] = int(urlparse.parse_qs(urlparse.urlparse(parsed["paging"]["previous"])[4])["since"][0]) - 1 delay = ( 100 / ( (len(parsed["data"]) + 5) / (time.time() - locale["last_retrieve"]) ) ) if delay > 300: delay = 300 if l == "en_US": delay = 10 locales[l]["next_retrieve"] = time.time() + delay locales[l]["last_retrieve"] = time.time() for post in parsed["data"]: status.event(l+"posts") if "from" in post: post["from"].update({"locale":l}) sql_data = {"id":post["id"], "data":json.dumps(post)} if not locales[l]["skip"].has_key(post["id"]): sql.insertRow(cursor, "facebook_statuses"+tableSuffix(), sql_data, True) if post["updated_time"] == parsed["data"][0]["updated_time"]: locales[l]["skip"][post["id"]] = locale["since"] for key in locales[l]["skip"].keys(): if locales[l]["skip"][key] != locales[l]["since"]: locales[l]["skip"].pop(key) if len(parsed["data"]) > 480: status.event(l+"pegged_requests") result_count = 0 if parsed.has_key("data"): result_count = len(parsed["data"]) sql_data = {"time":str(time.time()), "locale":l, "since":locale["since"], "status_code":str(res.status), "results":str(result_count)} sql.insertRow(cursor, "facebook_requests"+tableSuffix(), sql_data) cursor.connection.commit()
resp = "{}" try: conn.request("GET", "/search.json?q=youtube.com&rpp=100&result_type=recent&filter=links&since_id="+str(since_id), None, {"User-Agent":"VideoMuffin"}) status.event("requests") res = conn.getresponse() if str(res.status) != "200": status.event("non_200_responses") resp = res.read() except (socket.error, httplib.error) as ex: status.event(str(ex)) try: parsed = json.loads(resp) except ValueError: parsed = {} status.event("json_value_errors") if parsed.has_key("results"): tweets_this_request = 0 for tweet in parsed["results"]: tweets_this_request +=1 status.event("tweets") if since_id < tweet["id"]: since_id = tweet["id"] sql_data = {"id":tweet["id"], "data":json.dumps(tweet)} sql.insertRow(cursor, "twitter_statuses"+tableSuffix(), sql_data, True) if tweets_this_request == 100: status.event("pegged_requests") sql_data = {"time":str(time.time()), "since_id":str(since_id), "status_code":str(res.status), "results":str(tweets_this_request)} sql.insertRow(cursor, "twitter_requests"+tableSuffix(), sql_data) cursor.connection.commit() conn.close() time.sleep(1)
max_id = 0 table_suffix = tableSuffix() while True: cursor.execute("select id, data from facebook_polldata" + table_suffix + " where id > " + str(max_id) + " limit 500") res = cursor.fetchone() if res == None and table_suffix != tableSuffix(): table_suffix = tableSuffix() max_id = 0 while res: max_id = res[0] data = json.loads(res[1]) status.event("rows_processed") if "normalized_url" in data.keys(): vid = getVideoID(data["normalized_url"]) extant_data = sql.scalar(ccursor, "facebook_pollcount", "data", "video", vid) str_data = str(data["retrieved"]) + " " + str( data["like_count"]) + " " + str(data["share_count"]) + " " if extant_data == None: sql_data = {"video": vid, "data": str_data} sql.insertRow(ccursor, "facebook_pollcount", sql_data) else: ccursor.execute( "update facebook_pollcount set data = concat(data, '" + str_data + "') where video = '" + vid + "'") res = cursor.fetchone() cursor.connection.commit() ccursor.connection.commit() time.sleep(0.1)
["since"][0]) - 1 delay = (100 / ((len(parsed["data"]) + 5) / (time.time() - locale["last_retrieve"]))) if delay > 300: delay = 300 if l == "en_US": delay = 10 locales[l]["next_retrieve"] = time.time() + delay locales[l]["last_retrieve"] = time.time() for post in parsed["data"]: status.event(l + "posts") if "from" in post: post["from"].update({"locale": l}) sql_data = {"id": post["id"], "data": json.dumps(post)} if not locales[l]["skip"].has_key(post["id"]): sql.insertRow(cursor, "facebook_statuses" + tableSuffix(), sql_data, True) if post["updated_time"] == parsed["data"][0][ "updated_time"]: locales[l]["skip"][post["id"]] = locale["since"] for key in locales[l]["skip"].keys(): if locales[l]["skip"][key] != locales[l]["since"]: locales[l]["skip"].pop(key) if len(parsed["data"]) > 480: status.event(l + "pegged_requests") result_count = 0 if parsed.has_key("data"): result_count = len(parsed["data"]) sql_data = {
def run(self): base_url = "/method/links.getStats?format=json&urls=" youtube_base = "youtube.com%2Fwatch%3Fv%3D" while True: lock.acquire() req_ids = [] while len(req_ids) < 850 and not q.empty(): req_ids.append(q.get()) if q.empty(): for ytid in get_ids.ids: q.put(ytid) lock.release() if len(req_ids) > 0: req_url = base_url for req_id in req_ids: req_url += youtube_base + req_id + "," error_thrown = False try: time.sleep(0.01) self.httpconn.request("GET", req_url) res = self.httpconn.getresponse().read() except socket.error as ex: error_thrown = True res = "{}" self.httpconn = httplib.HTTPSConnection("api.facebook.com") status.event("requests") retrieved = time.time() jdata = {} parsed = json.loads(res) for video in parsed: try: video_id = getVideoID(video["normalized_url"]) total_count = video["total_count"] lock.acquire() if video_id in video_ids_seen and total_counts[ video_id] == total_count: status.event( "data_unchanged" ) #data has remained unchanged since last request for this video else: video_ids_seen.add(video_id) total_counts.update({video_id: total_count}) jdata = video status.event("data_changed") lock.release() status.event("urls_polled") jdata.update({"retrieved": retrieved}) sql_data = {"data": json.dumps(jdata)} sql.insertRow(self.cursor, "facebook_polldata" + tableSuffix(), sql_data) self.conn.commit() except: error_thrown = True if error_thrown == True: lock.acquire() for req_id in req_ids: q.put(req_id) lock.release() status.event("request_errors")
resp = res.read() except (socket.error, httplib.error) as ex: status.event(str(ex)) try: parsed = json.loads(resp) except ValueError: parsed = {} status.event("json_value_errors") if parsed.has_key("results"): tweets_this_request = 0 for tweet in parsed["results"]: tweets_this_request += 1 status.event("tweets") if since_id < tweet["id"]: since_id = tweet["id"] sql_data = {"id": tweet["id"], "data": json.dumps(tweet)} sql.insertRow(cursor, "twitter_statuses" + tableSuffix(), sql_data, True) if tweets_this_request == 100: status.event("pegged_requests") sql_data = { "time": str(time.time()), "since_id": str(since_id), "status_code": str(res.status), "results": str(tweets_this_request) } sql.insertRow(cursor, "twitter_requests" + tableSuffix(), sql_data) cursor.connection.commit() conn.close() time.sleep(1)