def dump_data(self): if not self.public_url: returnValue(False) stats = yield find_stats({'user': self.user}, filter=sortasc('timestamp'), timeout=120) dates = [s['timestamp'] for s in stats] tweets = [s['tweets'] for s in stats] tweets_diff = [a - b for a, b in zip(tweets[1:],tweets[:-1])] followers = [s['followers'] for s in stats] followers_diff = [a - b for a, b in zip(followers[1:], followers[:-1])] rts_diff = [s['rts_last_hour'] for s in stats] rts = [] n = 0 for a in rts_diff: n += a rts.append(n) jsondata = {} imax = len(dates) - 1 for i, date in enumerate(dates): ts = int(time.mktime(date.timetuple())) jsondata[ts] = { 'tweets': tweets[i], 'followers': followers[i], 'rts': rts[i] } if i < imax: jsondata[ts].update({ 'tweets_diff': tweets_diff[i], 'followers_diff': followers_diff[i], 'rts_diff': rts_diff[i+1] }) try: jsondir = os.path.join('web', 'data') if not os.path.exists(jsondir): os.makedirs(jsondir) os.chmod(jsondir, 0o755) with open(os.path.join(jsondir, 'stats_%s.json' % self.user), 'w') as outfile: write_json(jsondata, outfile) except IOError as e: loggerr("Could not write web/data/stats_%s.json : %s" % (self.user, e), action="stats") try: from plots import CumulativeCurve, DailyHistogram, WeekPunchCard imgdir = os.path.join('web', 'img') if not os.path.exists(imgdir): os.makedirs(imgdir) os.chmod(imgdir, 0o755) CumulativeCurve(dates, tweets, 'Total tweets', imgdir, 'tweets_%s' % self.user) CumulativeCurve(dates, followers, 'Total followers', imgdir, 'followers_%s' % self.user) CumulativeCurve(dates, rts, 'Total RTs since %s' % dates[0], imgdir, 'rts_%s' % self.user) DailyHistogram(dates[:-1], tweets_diff, 'New tweets', imgdir, 'new_tweets_%s' % self.user) DailyHistogram(dates[:-1], followers_diff, 'New followers', imgdir, 'new_followers_%s' % self.user) DailyHistogram(dates[:-1], rts_diff[1:], 'New RTs', imgdir, 'new_rts_%s' % self.user) WeekPunchCard(dates[:-1], tweets_diff, 'Tweets punchcard', imgdir, 'tweets_card_%s' % self.user) WeekPunchCard(dates[:-1], followers_diff, 'Followers punchcard', imgdir, 'followers_card_%s' % self.user) WeekPunchCard(dates[:-1], rts_diff[1:], 'RTs punchcard', imgdir, 'rts_card_%s' % self.user) except Exception as e: loggerr("Could not write images in web/img for %s : %s" % (self.user, e), action="stats") data = {'user': self.user, 'url': self.public_url} self.render_template("static_stats.html", self.user, data) returnValue(True)
def getFeeds(db, channel, database, url_format=True, add_url=None, randorder=None): urls = [] queries = yield db['feeds'].find( { 'database': database, 'channel': channel.lower() }, fields=['name', 'query'], filter=sortasc('timestamp')) if database == "tweets": # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db query = "" try: queries = [queries[i] for i in randorder] except: pass for feed in queries: # queries starting with @ should return only tweets from corresponding user arg = str(feed['query'].encode('utf-8')).replace('@', 'from:') rawrg = arg space = " OR " if url_format: if not arg.startswith('from:') and not arg.startswith('#'): arg = "(%s)" % urllib.quote(arg, '') if add_url: space = "+OR+" arg = "%s%s" % (arg, space) else: arg = " «%s» | " % arg if " OR " in rawrg or " -" in rawrg: urls.append(formatQuery(arg, add_url)) elif query.count(space) < 3: query += arg else: urls.append(formatQuery(query, add_url)) query = arg if query != "": urls.append(formatQuery(query, add_url)) else: if not url_format: urls = assembleResults([feed['name'] for feed in queries]) elif database == "pages": urls = [(str(feed['query']), feed['name']) for feed in queries] else: urls = [str(feed['query']) for feed in queries] defer.returnValue(urls)
def getFeeds(db, channel, database, url_format=True, add_url=None, randorder=None): urls = [] queries = yield db["feeds"].find( {"database": database, "channel": re.compile("^%s$" % channel, re.I)}, fields=["name", "query"], filter=sortasc("timestamp"), ) if database == "tweets": # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db query = "" try: queries = [queries[i] for i in randorder] except: pass for feed in queries: # queries starting with @ should return only tweets from corresponding user arg = str(feed["query"].encode("utf-8")).replace("@", "from:") rawrg = arg space = " OR " if url_format: if not arg.startswith("from:") and not arg.startswith("#"): arg = "(%s)" % urllib.quote(arg, "") if add_url: space = "+OR+" arg = "%s%s" % (arg, space) else: arg = " «%s» | " % arg if " OR " in rawrg or " -" in rawrg: urls.append(formatQuery(arg, add_url)) elif query.count(space) < 3: query += arg else: urls.append(formatQuery(query, add_url)) query = arg if query != "": urls.append(formatQuery(query, add_url)) else: if not url_format: urls = assembleResults([feed["name"] for feed in queries]) else: urls = [str(feed["query"]) for feed in queries] defer.returnValue(urls)
def digest(self, hours, channel): now = datetime.today() since = now - timedelta(hours=hours) re_chan = re.compile(r'^#*%s$' % channel.lower(), re.I) query = {'channel': re_chan, 'timestamp': {'$gte': since}} data = { "channel": channel, "t0": clean_date(since), "t1": clean_date(now), "news": [], "links": [], "imgs": [], "tweets": [] } news = yield SingleMongo('news', 'find', query, fields=['sourcename', 'source', 'link', 'message'], filter=sortasc('sourcename')+sortasc('timestamp')) lastsource = "" for n in news: source = n["sourcename"] if source != lastsource: lastsource = source data["news"].append({ "name": source, "link": n["link"], "elements": [] }) data["news"][-1]["elements"].append({ "text": n["message"], "link": n["link"] }) del(news) tweets = yield SingleMongo('tweets', 'find', query, fields=['screenname', 'message', 'link'], filter=sortasc('id')) links = {} imgs = {} filters = yield SingleMongo('filters', 'find', {'channel': re_chan}, fields=['keyword']) filters = [keyword['keyword'].lower() for keyword in filters] for t in tweets: skip = False tuser_low = t['screenname'].lower() if "@%s" % tuser_low in filters: continue msg_low = t["message"].lower() if not ((self.user and self.user in msg_low) or self.user == tuser_low): for k in filters: if k in msg_low: skip = True break if skip: continue for link in URL_REGEX.findall(t["message"]): link, _ = clean_url(link[2]) if not link.startswith("http"): continue tid = re_twitmedia.search(link) if tid: tid = tid.group(1) if tid not in imgs: imgs[tid] = 1 data["imgs"].append({"id": tid}) continue if re_tweet.match(link): continue if link not in links: links[link] = { "link": link, "first": ("%s: %s" % (t["screenname"], t["message"].replace(link, ""))), "firstlink": t["link"], "count": 0 } links[link]["count"] += 1 del(tweets) data["tweets"] = sorted(links.values(), key=lambda x: "%06d-%s" % (10**6-x['count'], x['link'])) del(links) query["user"] = {"$ne": config.BOTNAME.lower()} query["message"] = re.compile(r'https?://') links = yield SingleMongo('logs', 'find', query, fields=['screenname', 'message'], filter=sortasc('timestamp')) for entry in links: for link in re_links.findall(entry["message"]): data["links"].append({ "user": entry["screenname"], "msg": entry["message"], "link": link }) del(links) filename = "%s_%s_%s" % (channel.lstrip("#"), data["t0"].replace(" ", "+"), data["t1"].replace(" ", "+")) if not self.render_template("digest.html", filename, data): returnValue("Wooops could not generate html for %s..." % filename) returnValue("Digest for the last %s hours available at %sdigest_%s.html" % (hours, self.public_url, filename))
def getFeeds(db, channel, database, url_format=True, add_url=None, randorder=None): urls = [] queries = yield db['feeds'].find({'database': database, 'channel': channel.lower()}, fields=['name', 'query'], filter=sortasc('timestamp')) if database == "tweets": # create combined queries on Icerocket/Topsy or the Twitter API from search words retrieved in db query = "" try: queries = [queries[i] for i in randorder] except: pass for feed in queries: # queries starting with @ should return only tweets from corresponding user arg = str(feed['query'].encode('utf-8')).replace('@', 'from:') rawrg = arg space = " OR " if url_format: if not arg.startswith('from:') and not arg.startswith('#'): arg = "(%s)" % urllib.quote(arg, '') if add_url: space = "+OR+" arg = "%s%s" % (arg, space) else: arg = " «%s» | " % arg if " OR " in rawrg or " -" in rawrg: urls.append(formatQuery(arg, add_url)) elif query.count(space) < 3: query += arg else: urls.append(formatQuery(query, add_url)) query = arg if query != "": urls.append(formatQuery(query, add_url)) else: if not url_format: urls = assembleResults([feed['name'] for feed in queries]) elif database == "pages": urls = [(str(feed['query']), feed['name']) for feed in queries] else: urls = [str(feed['query']) for feed in queries] defer.returnValue(urls)