Пример #1
0
def generate_totals(today):
	query = WordcountSummary.query(WordcountSummary.iso_published_date == today)

	totals = {"wordcount" : 0, "sections" : {}}
	for result in query:
		totals["wordcount"] += result.wordcount

	return totals
Пример #2
0
def read_todays_content(page = 1):
	url = "http://content.guardianapis.com/search"

	today = datetime.date.today()

	payload = {
		"page" : str(page),
		"page-size" : "50",
		"format" : "json",
		"show-fields" : "wordcount",
		"tags" : "tone",
		"from-date" : today.isoformat(),
		"api-key" : configuration.lookup('API_KEY'),
		}

	final_url = url + "?" + urlencode(payload)
	#logging.info(final_url)

	result = urlfetch.fetch(final_url, deadline = 9)

	if not result.status_code == 200:
		logging.warning("Failed to read from the Content Api")
		logging.warning('Status code: %d' % result.status_code)
		return

	data = json.loads(result.content)

	api_response = data.get("response", {})

	total_pages = api_response.get("pages", None)

	if not total_pages:
		return

	results = api_response.get("results", [])

	for result in results:
		fields = result.get("fields", {})

		if not 'wordcount' in fields: continue

		path = result["id"]

		live_flag = tags.is_live(result)

		lookup = WordcountSummary.query(WordcountSummary.path == path)

		if lookup.count() > 0:

			record = lookup.iter().next()

			current_wordcount = read_wordcount(fields)

			if not current_wordcount == record.wordcount:
				record.wordcount = current_wordcount
				record.put()

			continue


		WordcountSummary(path = path,
			section_id = result["sectionId"],
			wordcount = read_wordcount(fields),
			iso_published_date = result["webPublicationDate"][:10],).put()

	if not int(total_pages) == page:
		read_todays_content(page + 1)
Пример #3
0
def historic_data(date):
	return [s for s in WordcountSummary.query(WordcountSummary.iso_published_date == date)]