def generate_totals(today): query = WordcountSummary.query(WordcountSummary.iso_published_date == today) totals = {"wordcount" : 0, "sections" : {}} for result in query: totals["wordcount"] += result.wordcount return totals
def read_todays_content(page = 1): url = "http://content.guardianapis.com/search" today = datetime.date.today() payload = { "page" : str(page), "page-size" : "50", "format" : "json", "show-fields" : "wordcount", "tags" : "tone", "from-date" : today.isoformat(), "api-key" : configuration.lookup('API_KEY'), } final_url = url + "?" + urlencode(payload) #logging.info(final_url) result = urlfetch.fetch(final_url, deadline = 9) if not result.status_code == 200: logging.warning("Failed to read from the Content Api") logging.warning('Status code: %d' % result.status_code) return data = json.loads(result.content) api_response = data.get("response", {}) total_pages = api_response.get("pages", None) if not total_pages: return results = api_response.get("results", []) for result in results: fields = result.get("fields", {}) if not 'wordcount' in fields: continue path = result["id"] live_flag = tags.is_live(result) lookup = WordcountSummary.query(WordcountSummary.path == path) if lookup.count() > 0: record = lookup.iter().next() current_wordcount = read_wordcount(fields) if not current_wordcount == record.wordcount: record.wordcount = current_wordcount record.put() continue WordcountSummary(path = path, section_id = result["sectionId"], wordcount = read_wordcount(fields), iso_published_date = result["webPublicationDate"][:10],).put() if not int(total_pages) == page: read_todays_content(page + 1)
def historic_data(date): return [s for s in WordcountSummary.query(WordcountSummary.iso_published_date == date)]