def test_document_translate_fallback(self): d_en = document(locale='en-US', title=u'How to delete Google Chrome?', save=True) invalid_translate = reverse('wiki.document', locale='tr', args=[d_en.slug]) self.assertEqual(d_en, Document.from_url(invalid_translate))
def pageviews_by_document(start_date, end_date): """Return the number of pageviews by document in a given date range. * Only returns en-US documents for now since that's what we did with webtrends. Returns a dict with pageviews for each document: {<document_id>: <pageviews>, 1: 42, 7: 1337,...} """ counts = {} request = _build_request() start_index = 1 max_results = 10000 while True: # To deal with pagination @retry_503 def _make_request(): return request.get( ids='ga:' + profile_id, start_date=str(start_date), end_date=str(end_date), metrics='ga:pageviews', dimensions='ga:pagePath', filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/', max_results=max_results, start_index=start_index).execute() results = _make_request() for result in results['rows']: path = result[0] pageviews = int(result[1]) doc = Document.from_url(path, id_only=True, check_host=False) if not doc: continue # The same document can appear multiple times due to url params. counts[doc.pk] = counts.get(doc.pk, 0) + pageviews # Move to next page of results. start_index += max_results if start_index > results['totalResults']: break return counts
def _visit_counts(cls, json_data): """Given WebTrends JSON data, return a dict of doc IDs and visits: {document ID: number of visits, ...} If there is no interesting data in the given JSON, return {}. """ # We're very defensive here, as WebTrends has been known to return # invalid garbage of various sorts. try: data = json.loads(json_data)['data'] except (ValueError, KeyError, TypeError): raise StatsException('Error extracting data from WebTrends JSON') try: pages = (data[data.keys()[0]]['SubRows'] if data.keys() else {}).iteritems() except (AttributeError, IndexError, KeyError, TypeError): raise StatsException('Error extracting pages from WebTrends data') counts = {} for url, page_info in pages: doc = Document.from_url( url, required_locale=settings.LANGUAGE_CODE, id_only=True, check_host=False) if not doc: continue # Get visit count: try: visits = int(page_info['measures']['Visits']) except (ValueError, KeyError, TypeError): continue # Sometimes WebTrends repeats a URL modulo a space, etc. These can # resolve to the same document. An arbitrary one wins. # TODO: Should we be summing these? if doc.pk in counts: log.info('WebTrends has the following duplicate URL for this ' 'document: %s' % url) counts[doc.pk] = visits return counts
def test_document_translate_fallback(self): d_en = document(locale="en-US", title=u"How to delete Google Chrome?", save=True) invalid_translate = reverse("wiki.document", locale="tr", args=[d_en.slug]) self.assertEqual(d_en, Document.from_url(invalid_translate))