示例#1
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document', locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
示例#2
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document',
                                 locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
示例#3
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
示例#4
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
示例#5
0
    def _visit_counts(cls, json_data):
        """Given WebTrends JSON data, return a dict of doc IDs and visits:

            {document ID: number of visits, ...}

        If there is no interesting data in the given JSON, return {}.

        """
        # We're very defensive here, as WebTrends has been known to return
        # invalid garbage of various sorts.
        try:
            data = json.loads(json_data)['data']
        except (ValueError, KeyError, TypeError):
            raise StatsException('Error extracting data from WebTrends JSON')

        try:
            pages = (data[data.keys()[0]]['SubRows'] if data.keys()
                     else {}).iteritems()
        except (AttributeError, IndexError, KeyError, TypeError):
            raise StatsException('Error extracting pages from WebTrends data')

        counts = {}
        for url, page_info in pages:
            doc = Document.from_url(
                url,
                required_locale=settings.LANGUAGE_CODE,
                id_only=True,
                check_host=False)
            if not doc:
                continue

            # Get visit count:
            try:
                visits = int(page_info['measures']['Visits'])
            except (ValueError, KeyError, TypeError):
                continue

            # Sometimes WebTrends repeats a URL modulo a space, etc. These can
            # resolve to the same document. An arbitrary one wins.
            # TODO: Should we be summing these?
            if doc.pk in counts:
                log.info('WebTrends has the following duplicate URL for this '
                         'document: %s' % url)
            counts[doc.pk] = visits
        return counts
示例#6
0
 def test_document_translate_fallback(self):
     d_en = document(locale="en-US", title=u"How to delete Google Chrome?", save=True)
     invalid_translate = reverse("wiki.document", locale="tr", args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))