# -*- coding: utf-8 -*- from collections import Counter from datetime import datetime, timedelta import falcon import ujson as json from streamer import get_db, get_keywords from hortiradar import tokenizeRawTweetText, admins, users tweets = get_db().tweets KEYWORDS = get_keywords() time_format = "%Y-%m-%d-%H-%M-%S" with open("data/stoplist-nl.txt") as f: stop_words = [w.decode("utf-8").strip() for w in f.readlines()] stop_words = {w: 1 for w in stop_words} # stop words to filter out in word cloud def get_dates(req, resp, resource, params): """Parse the 'start' and 'end' datetime parameters.""" try: start = req.get_param("start") start = datetime.strptime(start, time_format) if start else datetime(2001, 1, 1) end = req.get_param("end") end = datetime.strptime(end, time_format) if end else datetime(3001, 1, 1) params["start"] = start params["end"] = end except ValueError: msg = "Invalid datetime format string, use: %s" % time_format
from streamer import get_db, get_keywords, find_keywords_and_groups from hortiradar import tokenizeRawTweetText tweets = get_db().tweets keywords = get_keywords() tw = tweets.find() for t in tw: tokens = tokenizeRawTweetText(t["tweet"]["text"]) kws, groups = find_keywords_and_groups(tokens, keywords) tweets.update_one({"_id": t["_id"]}, { "$set": { "keywords": kws, "groups": groups, "num_keywords": len(kws) } })