Пример #1
0
    def test_fake_query(self):
        account = model.Account.create(service='google')
        Session.commit()

        q = api.account.query_service(self.request, account=account)

        r = q.get_profile()
        self.assertEqual(r[u'websiteUrl'], u'example.com')

        q = api.account.query_service(self.request, account=account)
        r = q.get_profile()
        self.assertEqual(r[u'websiteUrl'], u'example.com')

        t = q.get_table({'max-results': 5}, dimensions=[
            Column('ga:pagePath'),
        ], metrics=[
            Column('ga:pageviews', type_cast=int, threshold=0),
            Column('ga:nthWeek'),
        ])
        self.assertEqual(len(t.rows), 5)
        self.assertEqual(t.rows[1].get('ga:pagePath'), '/account/create')
        self.assertEqual(t.rows[1].get('ga:pageviews'), 15001)
        self.assertEqual(t.get('ga:pageviews').max_row[0], 16399)

        t = q.get_table({'max-results': 7}, dimensions=[Column('ga:month')])
        self.assertEqual([list(m) for m in t.iter_rows()], [[u'01']]*6 + [[u'02']]) 
Пример #2
0
    def test_rows(self):
        def positive_int(n):
            if not n or n < 0:
                return
            return int(n)

        t = Table([
            Column('foo', visible=1),
            Column('bar', visible=0),
            Column('baz', type_cast=positive_int, average=100),
        ])

        data = [
            (9999, '1', 123),
            (123, '2', 1234),
            (0000, '3', 23),
            (123, '4', 123),
            (123, '5', 0),
        ]

        for d in data:
            t.add(d)

        self.assertEqual(len(t.rows), len(data))
        self.assertEqual(t.get('foo').min_row, (None, None))
        self.assertEqual(t.get('bar').max_row, (None, None))
        self.assertEqual(t.get('baz').min_row[0], 23)
        self.assertEqual(t.get('baz').max_row[0], 1234)

        rows = t.iter_visible()
        self.assertEqual(list(next(rows)), [t.columns[1], t.columns[0]])
        self.assertEqual(list(next(rows)), ['1', 9999])

        t.tag_rows()
        self.assertFalse(t.rows[-1].tags)
Пример #3
0
 def _get_search_keywords(self, google_query, interval_field):
     t = self._get_interval_table(google_query, interval_field,
         params={
             'ids': 'ga:%s' % self.remote_id,
             'start-date': self.previous_date_start, # Extra week
             'end-date': self.date_end,
             'sort': '-{},-ga:sessions'.format(interval_field),
             'filters': 'ga:keyword!=(not provided);ga:medium==organic',
             'max-results': '5',
         },
         dimensions=[
             Column(interval_field),
             Column('ga:keyword', label='Search Keywords', type_cast=_prune_abstract, visible=1),
         ],
         metrics=[
             Column('ga:sessions', label='Visits', type_cast=int, type_format=h.human_int, visible=0, threshold=0),
             Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0),
             Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0),
         ],
     )
     t.set_visible('ga:sessions', 'ga:keyword')
     #split_table_delta(t, split_column=interval_field, join_column='ga:keyword', compare_column='ga:sessions')
     #t.sort(reverse=True)
     #t.limit(10)
     return t
Пример #4
0
    def _get_ecommerce(self, google_query, interval_field, limit=10):
        t = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                #'start-date': self.previous_date_start, # Extra week
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-{}'.format(interval_field),
            },
            dimensions=[
                Column(interval_field),
                Column('ga:productName', label="Product", visible=1),
            ],
            metrics=[
                Column('ga:itemRevenue',
                       label="Revenue",
                       type_cast=float,
                       type_format=_format_dollars,
                       visible=0,
                       threshold=0),
                Column('ga:itemQuantity',
                       label="Sales",
                       type_cast=int,
                       type_format=h.human_int),
            ],
        )
        t.sort(reverse=True)

        # Add a limit row
        sum_columns = t.column_to_index['ga:itemRevenue'], t.column_to_index[
            'ga:itemQuantity']
        if len(t.rows) > limit:
            extra, t.rows = t.rows[limit - 1:], t.rows[:limit - 1]
            values = extra[0].values[:]
            for row in extra[1:]:
                for col_idx in sum_columns:
                    values[col_idx] += row.values[col_idx]

            values[
                t.column_to_index['ga:productName']] = "(%s)" % h.format_int(
                    len(extra), u"{:,} other product")
            t.add(values)

        idx_sales = t.column_to_index['ga:itemQuantity']
        for row in t.rows:
            v = row.values[idx_sales]
            row.tag(h.format_int(v, u"{:,} Sale"))

        # Add total row
        #row = t.rows[-1].values[:]
        #row[t.column_to_index['ga:itemRevenue']] = t.get('ga:itemRevenue').sum
        #row[t.column_to_index['ga:productName']] = '(total)'
        #t.add(row)

        # Old work from extra week mode
        #split_table_delta(t, interval_field, 'ga:productName', 'ga:itemRevenue')

        return t
Пример #5
0
    def _get_interval_table(self, google_query, interval_field, params, dimensions=None, metrics=None, _cache_keys=None):
        if interval_field != 'bm:quarter':
            return google_query.get_table(params=params, dimensions=dimensions, metrics=metrics, _cache_keys=_cache_keys)

        dimensions += [Column('ga:year')]
        columns = google_query._columns_to_params(params.copy(), dimensions=dimensions, metrics=metrics)
        result = Table(columns).new() # Decouple column refs
        quarter_idx = result.column_to_index['bm:quarter']

        if dimensions:
            dimensions = [d for d in dimensions if not d.id.startswith('bm:quarter')]

        # We assume that there are no date dimensions... Maybe not a safe assumption?
        for (yr, q) in reversed(list(iter_quarters(params['start-date'], params['end-date']))):
            start_date, end_date = quarter_to_dates(q, yr)
            quarter_params = params.copy()
            quarter_params.update({
                'start-date': start_date,
                'end-date': end_date,
            })
            quarter_params.pop('sort', None)
            print(quarter_params)
            t = google_query.get_table(params=quarter_params, dimensions=dimensions, metrics=metrics, renew=True, _cache_keys=_cache_keys)

            quarter_str = "{}Q{}".format(yr, q)
            for row in t.rows:
                vals = row.values
                vals.insert(quarter_idx, quarter_str)
                result.add(vals)

        return result
Пример #6
0
def inject_table_delta(a,
                       b,
                       join_column,
                       compare_column='ga:pageviews',
                       num_normal=10,
                       num_missing=5):
    """
    Annotate rows in table `a` with deltas from table `b`.

    Also add `num_missing` top rows from `b` that are not present in `a`.

    It's basically a left-join with extra sauce.
    """
    col_compare = a.get(compare_column)
    a_lookup = set(source for source, in a.iter_rows(join_column))
    b_lookup = dict(
        (source, views)
        for source, views in b.iter_rows(join_column, compare_column))

    # Annotate delta tags
    col_compare_delta = Column('%s:delta' % col_compare.id,
                               label=col_compare.label,
                               type_cast=float,
                               type_format=h.human_delta,
                               threshold=0)
    idx_join, idx_compare = a.column_to_index[join_column], a.column_to_index[
        compare_column]
    for row in a.rows:
        j, views = row.values[idx_join], row.values[idx_compare]
        last_views = b_lookup.get(j) or 0
        views_delta = (views - last_views) / float(views)

        if not last_views:
            row.tag(type='new')
        elif abs(views_delta) > 0.20:
            row.tag(type='delta', value=views_delta, column=col_compare_delta)

    a.limit(num_normal)

    # Add missing entries
    for source, views in b.iter_rows(join_column, compare_column):
        if source in a_lookup:
            continue

        if col_compare.is_boring(views):
            break  # Done early

        row = a.add([source, 0, 0, None, None, -views], is_measured=False)
        row.tag(type='views',
                value=h.human_int(-views),
                is_positive=False,
                is_prefixed=True)

        num_missing -= 1
        if num_missing <= 0:
            break
Пример #7
0
    def _get_social_search(self, google_query, date_start, date_end, summary_metrics, max_results=10):
        organic_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': date_start,
                'end-date': date_end,
                'filters': 'ga:medium!=referral;ga:medium!=(not set);ga:socialNetwork==(not set)',
                'sort': '-ga:pageviews',
                'max-results': str(max_results),
            },
            dimensions=[
                Column('ga:source', type_cast=_prune_abstract),
            ],
            metrics=[col.new() for col in summary_metrics],
        )

        social_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': date_start,
                'end-date': date_end,
                'sort': '-ga:pageviews',
                'max-results': str(max_results),
            },
            dimensions=[
                Column('ga:socialNetwork', type_cast=_prune_abstract),
            ],
            metrics=[col.new() for col in summary_metrics],
        )

        source_col = Column('source', label='Social & Search & Campaigns', visible=1, type_cast=_cast_title)
        t = Table(columns=[
            source_col,
        ] + [col.new() for col in summary_metrics])

        for cells in social_table.iter_rows():
            t.add(cells)

        for cells in organic_table.iter_rows():
            t.add(cells)

        t.sort(reverse=True)
        return t
Пример #8
0
def explore_api(request):
    u = api.account.get_admin(request)
    a = u.get_account(service='google')

    report_id, dimensions, metrics, extra, date_start, date_end = get_many(
        request.params, ['report_id'],
        optional=['dimensions', 'metrics', 'extra', 'date_start', 'date_end'])
    report = model.Report.get_by(account_id=a.id, id=report_id)

    if not report:
        raise APIControllerError("Invalid report id: %s" % report_id)

    cache_keys = ('admin/explore_api', )
    google_query = api.account.query_service(request,
                                             report.account,
                                             cache_keys=cache_keys)

    date_end = date_end or date.today()
    date_start = date_start or date_end - timedelta(days=7)

    params = {
        'ids': 'ga:%s' % report.remote_id,
        'start-date': date_start,
        'end-date': date_end,
    }

    if metrics:
        metrics = [Column(m) for m in metrics.split(',')]

    if dimensions:
        dimensions = [Column(m) for m in dimensions.split(',')]

    if extra:
        params.update(part.split('=', 1) for part in extra.split('&'))

    try:
        r = google_query.get_table(params,
                                   metrics=metrics,
                                   dimensions=dimensions)
    except KeyError as e:
        raise APIControllerError("Invalid metric or dimension: %s" % e.args[0])

    return {'table': r}
Пример #9
0
    def test_join(self):
        t = Table([
            Column('value'),
            Column('joincol'),
            Column('splitcol'),
        ])
        expected = t.new()

        t.add((1, 'foo', 'a'))
        t.add((2, 'bar', 'a'))
        t.add((3, 'baz', 'a'))
        t.add((8, 'bar', 'b'))
        t.add((7, 'baz', 'b'))
        t.add((6, 'quux', 'b'))

        expected.add((1, 'foo', 'a'))
        expected.add((2, 'bar', 'a')).tag('Value', -6)
        expected.add((3, 'baz', 'a')).tag('Value', -4)

        split_table_delta(t, 'splitcol', 'joincol', 'value')
        self.assertEqual(dump(t), dump(expected))
Пример #10
0
    def _get_geo(self, google_query, summary_metrics):
        t = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:users',
                'max-results': '50',
            },
            dimensions=[
                Column('ga:country',
                       label='Country',
                       visible=1,
                       type_cast=_prune_abstract),
            ],
            metrics=[col.new() for col in summary_metrics] + [
                Column('ga:sessionsPerUser',
                       label='Sessions Per User',
                       threshold=0.0,
                       type_cast=float,
                       type_format=_format_float),
            ],
        )
        total = float(t.get('ga:users').sum)
        out = Table(columns=[col.new() for col in t.columns] + [
            Column(
                'users', label='Users', visible=0,
                type_format=_format_percent),
        ])
        out.get('ga:users')._threshold = None
        out.set_visible('users', 'ga:country')
        idx_users = t.column_to_index['ga:users']
        for row in t.rows[:5]:
            out.add(row.values + [row.values[idx_users] * 100.0 / total])

        out.tag_rows()

        return out
Пример #11
0
 def _get_search_keywords(self, google_query, interval_field):
     # TODO: Not used anymore, should be safe to remove.
     t = google_query.get_table(
         params={
             'ids': 'ga:%s' % self.remote_id,
             'start-date': self.date_start,  # Extra week
             'end-date': self.date_end,
             'sort': '-{},-ga:users'.format(interval_field),
             'filters': 'ga:keyword!=(not provided);ga:medium==organic',
             'max-results': '5',
         },
         dimensions=[
             Column(interval_field),
             Column('ga:keyword',
                    label='Search Keywords',
                    type_cast=_prune_abstract,
                    visible=1),
         ],
         metrics=[
             Column('ga:users',
                    label='Users',
                    type_cast=int,
                    type_format=h.human_int,
                    visible=0,
                    threshold=0),
             Column('ga:avgSessionDuration',
                    label='Session',
                    type_cast=_cast_time,
                    type_format=h.human_time,
                    threshold=0),
         ],
     )
     t.set_visible('ga:users', 'ga:keyword')
     #split_table_delta(t, split_column=interval_field, join_column='ga:keyword', compare_column='ga:sessions')
     #t.sort(reverse=True)
     #t.limit(10)
     return t
Пример #12
0
 def _get_summary(self, google_query, interval_field, metrics):
     # Summary
     summary_params = {
         'ids': 'ga:%s' % self.remote_id,
         'start-date': self.previous_date_start, # Extra week
         'end-date': self.date_end,
         'sort': '-{}'.format(interval_field),
     }
     summary_dimensions = [
         Column(interval_field),
     ]
     return self._get_interval_table(google_query, interval_field,
         params=summary_params,
         dimensions=summary_dimensions,
         metrics=metrics,
     )
Пример #13
0
    def fetch(self, google_query):
        last_month_date_start = (self.date_start - datetime.timedelta(days=self.date_start.day + 1)).replace(day=1)

        # Summary
        summary_metrics = [
            Column('ga:pageviews', label='Views', type_cast=int, type_format=h.human_int, threshold=0, visible=0),
            Column('ga:users', label='Uniques', type_cast=int, type_format=h.human_int),
            Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0),
            Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0),
        ]
        self.tables['summary'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': last_month_date_start, # Extra month
                'end-date': self.date_end,
                'sort': '-ga:yearMonth',
            },
            dimensions=[
                Column('ga:yearMonth'),
            ],
            metrics=summary_metrics + [Column('ga:sessions', type_cast=int)],
        )

        self.tables['geo'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:users',
                'max-results': '10',
            },
            dimensions=[
                Column('ga:country', label='Country', visible=1, type_cast=_prune_abstract),
            ],
            metrics=[col.new() for col in summary_metrics],
        )

        self.tables['device'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:users',
            },
            dimensions=[
                Column('ga:deviceCategory', label='Device', visible=1, type_cast=_cast_title),
            ],
            metrics=[col.new() for col in summary_metrics],
        )

        self.tables['browser'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:users',
            },
            dimensions=[
                Column('ga:browser', label='Browser', visible=1),
            ],
            metrics=[col.new() for col in summary_metrics] + [
                Column('ga:avgPageLoadTime', label='Load Time', type_cast=float),
            ],
        )

        self.tables['geo'].tag_rows()
        self.tables['device'].tag_rows()
        self.tables['browser'].tag_rows()


        # TODO: Add last year's month

        historic_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': last_month_date_start,
                'end-date': self.date_end,
            },
            dimensions=[
                Column('ga:date'),
                Column('ga:yearMonth', visible=0),
            ],
            metrics=[
                Column('ga:pageviews', label='Views', type_cast=int, visible=1),
                Column('ga:users', label='Uniques', type_cast=int),
            ],
        )

        intro_config = self.config.get('intro')
        if intro_config:
            # For John Sheehan
            historic_table.set_visible('ga:yearMonth', intro_config)

        iter_historic = historic_table.iter_visible()
        _, views_column = next(iter_historic)
        monthly_data, max_value = cumulative_by_month(iter_historic)
        last_month, current_month = monthly_data

        self.data['historic_data'] = encode_rows(monthly_data, max_value)
        self.data['total_units'] = '{:,} %s' % views_column.label.lower().rstrip('s')
        self.data['total_current'] = current_month[-1]
        self.data['total_last'] = last_month[-1]
        self.data['current_month'] = self.date_start.strftime('%B')
        self.data['last_month'] = last_month_date_start.strftime('%B')
        self.data['current_month_days'] = self.date_end.day
        self.data['last_month_days'] = (self.date_start - datetime.timedelta(days=1)).day
Пример #14
0
    def fetch(self, google_query):
        days_delta = (self.date_end - self.date_start).days
        is_year_delta = days_delta > 360

        interval_field = 'ga:nthWeek'
        if is_year_delta:
            interval_field = 'ga:year'
        elif days_delta > 6:
            interval_field = 'ga:nthMonth'

        # Summary
        summary_params = {
            'ids': 'ga:%s' % self.remote_id,
            'start-date': self.previous_date_start,  # Extra week
            'end-date': self.date_end,
            'sort': '-{}'.format(interval_field),
        }
        summary_dimensions = [
            Column(interval_field),
        ]
        basic_metrics = [
            Column('ga:screenviews',
                   label='Views',
                   type_cast=int,
                   type_format=h.human_int),
            Column('ga:sessions',
                   label='Sessions',
                   type_cast=int,
                   type_format=h.human_int),
            Column('ga:users',
                   label='Users',
                   type_cast=int,
                   type_format=h.human_int,
                   threshold=0,
                   visible=0),
            Column('ga:avgSessionDuration',
                   label='Session',
                   type_cast=_cast_time,
                   type_format=h.human_time,
                   threshold=0),
        ]
        summary_metrics = basic_metrics + [
            Column('ga:goalConversionRateAll',
                   label='Conversion',
                   type_cast=float,
                   type_format=_format_percent,
                   threshold=0.1),
            Column('ga:itemRevenue',
                   label="Revenue",
                   type_cast=float,
                   type_format=_format_dollars),
            Column('ga:itemQuantity',
                   label="Sales",
                   type_cast=int,
                   type_format=h.human_int),
        ]
        self.tables['summary'] = summary_table = google_query.get_table(
            params=summary_params,
            dimensions=summary_dimensions,
            metrics=summary_metrics,
        )

        if not summary_table.has_value('ga:users'):
            raise EmptyReportError()

        include_ads = self.config.get('ads') or summary_table.has_value(
            'ga:itemRevenue')

        # Ads
        # FIXME: Merge this with summary_metrics once https://code.google.com/p/analytics-issues/issues/detail?id=693 is fixed.
        if include_ads:
            self.tables['ads'] = google_query.get_table(
                params={
                    'ids': 'ga:%s' % self.remote_id,
                    'start-date': self.date_start,
                    'end-date': self.date_end,
                },
                dimensions=[Column('ga:adGroup')],
                metrics=[
                    Column('ga:adCost',
                           label="Ad Spend",
                           type_cast=float,
                           type_format=_format_dollars,
                           threshold=0),
                    Column('ga:impressions',
                           label="Ad Impressions",
                           type_cast=int,
                           type_format=h.human_int,
                           threshold=0),
                    Column('ga:adClicks',
                           label="Ad Clicks",
                           type_cast=int,
                           type_format=h.human_int,
                           threshold=0),
                    Column('ga:itemRevenue',
                           label="Revenue",
                           type_cast=float,
                           type_format=_format_dollars,
                           threshold=0),
                    Column('ga:itemQuantity',
                           label="Sales",
                           type_cast=int,
                           type_format=h.human_int,
                           threshold=0),
                ],
            )

        # Screens
        screens_metrics = [col.new() for col in basic_metrics] + [
            Column('ga:exitRate',
                   label='Exit',
                   type_cast=float,
                   type_format=_format_percent,
                   reverse=True,
                   threshold=0),
        ]
        if self.config.get('pageloadtime', True):
            screens_metrics += [
                Column('ga:avgPageLoadTime',
                       label='Load',
                       type_cast=float,
                       type_format=h.human_time,
                       reverse=True,
                       threshold=0),
            ]

        self.tables['screens'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:screenviews',
                'max-results': '10',
            },
            dimensions=[
                Column('ga:screenName',
                       label='Screens',
                       visible=1,
                       type_cast=_prune_abstract),
            ],
            metrics=screens_metrics,
        )
        self.tables['screens'].set_visible('ga:screenviews', 'ga:screenName')

        if summary_table.has_value('ga:goalConversionRateAll'):
            # Goals
            self.tables['goals'] = self._get_goals(google_query,
                                                   interval_field)

        if summary_table.has_value('ga:itemRevenue'):
            # Ecommerce
            self.tables['ecommerce'] = self._get_ecommerce(
                google_query, interval_field)

        # Historic
        historic_start_date = self.previous_date_start
        if not is_year_delta:
            # Override to just previous month
            historic_start_date = self.date_end - datetime.timedelta(
                days=self.date_end.day)
            historic_start_date -= datetime.timedelta(
                days=historic_start_date.day - 1)

        # Note: Pace is different from interval, as year pace is still month over month whereas year interval is year over year.
        compare_interval = self.config.get('pace', 'month')
        if compare_interval == 'year' and not is_year_delta:
            historic_start_date = self.date_end - datetime.timedelta(
                days=self.date_end.day - 1)
            historic_start_date = historic_start_date.replace(
                year=historic_start_date.year - 1)

        dimensions = [
            Column('ga:yearMonth', visible=0),
        ]
        if not is_year_delta:
            dimensions += [
                Column('ga:date'),
            ]

        historic_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': historic_start_date,
                'end-date': self.date_end,
            },
            dimensions=dimensions,
            metrics=[
                # Note: This is unique users over the dimension which is daily,
                # so the combined counts we use later are actually more daily
                # sessions than users.
                Column('ga:users',
                       label='Daily Sessions',
                       type_cast=int,
                       visible=1),
                Column('ga:sessions', label='Sessions', type_cast=int),
            ],
        )

        intro_config = self.config.get('intro')
        if intro_config:
            # For John Sheehan
            historic_table.set_visible('ga:yearMonth', intro_config)

        iter_historic = historic_table.iter_visible()
        _, views_column = next(iter_historic)

        if is_year_delta:
            iter_historic = ((mo[:4], v) for mo, v in iter_historic)
        elif compare_interval == 'year':
            mo_filter = u'{d.month:02d}'.format(d=historic_start_date)
            iter_historic = ((mo, v) for mo, v in iter_historic
                             if mo.endswith(mo_filter))

        monthly_data, max_value = cumulative_by_month(iter_historic)
        last_month, current_month = monthly_data[-2:]

        self.data['historic_data'] = encode_rows(monthly_data, max_value)
        self.data['total_units'] = '{:,} %s' % views_column.label.lower(
        ).rstrip('s')
        self.data['total_current'] = current_month[-1]
        self.data['total_last'] = last_month[-1]
        self.data['total_last_relative'] = last_month[
            min(len(current_month), len(last_month)) - 1]
        self.data['total_last_date_start'] = historic_start_date

        self.tables['geo'] = self._get_geo(google_query, summary_metrics)
        self.tables['versions'] = self._get_versions(google_query,
                                                     summary_metrics)

        self.tables['screens'].tag_rows()
Пример #15
0
    def _get_versions(self, google_query, summary_metrics):
        t = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:appVersion',
                'max-results': '50',
            },
            dimensions=[
                Column('ga:appVersion', label='Version', visible=1),
                Column('ga:operatingSystem',
                       label='Operating System',
                       visible=2),
            ],
            metrics=[col.new() for col in summary_metrics] + [],
        )
        t.set_visible('ga:users', 'ga:appVersion')
        t.tag_rows()

        col_exception = Column('ga:fatalExceptions',
                               label='Crashes',
                               type_cast=int,
                               type_format=h.human_int)
        crashes_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:appVersion',
                'max-results': '50',
            },
            dimensions=[
                Column('ga:appVersion', label='Version', visible=1),
                Column('ga:operatingSystem',
                       label='Operating System',
                       visible=2),
            ],
            metrics=[col_exception],
        )
        crashes_lookup = dict(
            ((version, os), crashes)
            for crashes, version, os in crashes_table.iter_rows(
                'ga:fatalExceptions', 'ga:appVersion', 'ga:operatingSystem'))

        total = float(t.get('ga:users').sum)

        out = Table(columns=[
            Column(
                'users', label='Users', visible=0,
                type_format=_format_percent),
            Column('version', label='Version', visible=1),
        ])

        latest_version = None
        for i, (users, sessions, version, os) in enumerate(
                t.iter_rows('ga:users', 'ga:sessions', 'ga:appVersion',
                            'ga:operatingSystem')):
            row = out.add([users * 100.0 / total, u"%s on %s" % (version, os)])
            row.tags = t.rows[i].tags
            if not latest_version:
                latest_version = version
            crashes = crashes_lookup.get((version, os), 0)
            if latest_version == version:
                row.tag('latest')
            if crashes and crashes > sessions * 0.1:
                row.tag('crashes',
                        value=h.human_int(crashes),
                        is_positive=False,
                        is_prefixed=True)

        self.latet_version = latest_version
        out.limit(5)
        out.sort(reverse=True)
        return out
Пример #16
0
    def _get_goals(self, google_query, interval_field):
        goals_api = 'https://www.googleapis.com/analytics/v3/management/accounts/{accountId}/webproperties/{webPropertyId}/profiles/{profileId}/goals'
        r = google_query.get(
            goals_api.format(profileId=self.remote_data['id'],
                             **self.remote_data))
        has_goals = r.get('items') or []
        goal_metrics = [
            Column('ga:goal{id}Completions'.format(id=g['id']),
                   label=g['name'],
                   type_cast=float) for g in has_goals if g.get('active')
        ]

        if not goal_metrics:
            return

        # Note: max 10 metrics allowed
        metrics = goal_metrics[-9:] + [Column('ga:sessions', type_cast=int)]
        raw_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.previous_date_start,  # Extra week
                'end-date': self.date_end,
                'sort': '-{}'.format(interval_field),
            },
            metrics=metrics,
            dimensions=[
                Column(interval_field),
            ],
        )

        if len(raw_table.rows) != 2:
            # Less than 2 weeks of data available
            return

        t = Table(columns=[
            Column('goal', label='Goals', visible=1, type_cast=_cast_title),
            Column('completions',
                   label='Events',
                   visible=0,
                   type_cast=int,
                   type_format=h.human_int,
                   threshold=0),
        ])

        num_sessions, num_sessions_last = [
            next(v) for v in raw_table.iter_rows('ga:sessions')
        ]

        this_week, last_week = raw_table.rows
        col_compare = t.get('completions')
        col_compare_delta = Column('%s:delta' % col_compare.id,
                                   label='Events',
                                   type_cast=float,
                                   type_format=h.human_delta,
                                   threshold=0)
        has_completions = False
        for col_id, pos in raw_table.column_to_index.items():
            col = raw_table.columns[pos]
            if not col.id.startswith('ga:goal'):
                continue

            completions, completions_last = this_week.values[
                pos], last_week.values[pos]
            percent_completions = completions * 100.0 / num_sessions if num_sessions else 0.0
            percent_completions_last = completions_last * 100.0 / num_sessions_last if num_sessions_last else 0.0
            row = t.add([col.label, completions])
            if not row:
                # Boring
                continue

            if completions > 0:
                row.tag(type="Conversion",
                        value=_format_percent(percent_completions))

            if completions + completions_last > 0:
                has_completions = True
                # Old method:
                # delta = (percent_completions - percent_completions_last) / 100.0
                # New method (same as GA shows):
                delta = completions / completions_last - 1 if completions_last > 0.0 else 1.0
                if abs(delta) > 0.001:
                    row.tag(type='delta',
                            value=delta,
                            column=col_compare_delta,
                            is_positive=delta > 0)

        if not has_completions:
            return

        t.sort(reverse=True)

        return t
Пример #17
0
    def fetch(self, api_query):
        last_month_date_start = self.date_end - datetime.timedelta(
            days=self.date_end.day)
        last_month_date_start -= datetime.timedelta(
            days=last_month_date_start.day - 1)

        # TODO: Check 'has_more'
        week_params = {
            'created[gte]': to_epoch(self.date_start),
            'created[lt]':
            to_epoch(self.date_end + datetime.timedelta(days=1)),
            'limit': 100,
        }

        self.tables['customers'] = customers_table = Timeline()

        items = api_query.get_paged('https://api.stripe.com/v1/customers',
                                    params=week_params)

        for item in items:
            plan = (item.get('subscription') or {}).get('plan')
            if plan:
                plan = describe_plan(plan)

            customers_table.add([
                to_datetime(item['created']), ' '.join([
                    item.get('email') or '(no email)',
                    plan or '(no plan yet)',
                ])
            ])

        ##

        self.tables['events'] = events_table = Timeline()

        items = api_query.get_paged('https://api.stripe.com/v1/events',
                                    params=week_params)
        for item in items:
            events_table.add([
                to_datetime(item['created']),
                describe_event(item),
            ])

        ##

        historic_table = Table([
            Column('created', visible=0),
            Column('amount', label='Amount', visible=1),
        ])

        items = api_query.get_paged('https://api.stripe.com/v1/charges',
                                    params={
                                        'created[gte]':
                                        to_epoch(last_month_date_start),
                                        'created[lt]':
                                        to_epoch(self.date_end +
                                                 datetime.timedelta(days=1)),
                                        'limit':
                                        100,
                                    })
        for item in items:
            if not item['paid'] or item['refunded']:
                continue
            historic_table.add([
                to_datetime(item['created']),
                item['amount'],
            ])

        iter_historic = historic_table.iter_visible(reverse=True)
        _, views_column = next(iter_historic)
        monthly_data, max_value = sparse_cumulative(iter_historic,
                                                    final_date=self.date_end)
        last_month, current_month = [[0], [0]]
        if monthly_data:
            last_month, current_month = monthly_data[-2:]

        self.data['historic_data'] = encode_rows(monthly_data, max_value)
        self.data['total_current'] = current_month[-1]
        self.data['total_last'] = last_month[-1]
        self.data['total_last_relative'] = last_month[
            min(len(current_month), len(last_month)) - 1]
        self.data['total_last_date_start'] = last_month_date_start

        self.data['canary'] = None  # XXX: Fix this before launching
Пример #18
0
    def fetch(self, google_query):
        interval_field = self._get_interval_field()
        is_year_delta = interval_field == 'ga:year'
        is_quarter_delta = interval_field == 'bm:quarter'

        # Summary
        basic_metrics = [
            Column('ga:pageviews', label='Views', type_cast=int, type_format=h.human_int, threshold=0, visible=0),
            Column('ga:users', label='Uniques', type_cast=int, type_format=h.human_int),
            Column('ga:avgSessionDuration', label='Time On Site', type_cast=_cast_time, type_format=h.human_time, threshold=0),
            Column('ga:bounceRate', label='Bounce Rate', type_cast=_cast_percent, type_format=_format_percent, reverse=True, threshold=0),
        ]
        summary_metrics = basic_metrics + [
            Column('ga:goalConversionRateAll', label='Conversion', type_cast=float, type_format=_format_percent, threshold=0.1),
            Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars),
            Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int),
        ]
        self.tables['summary'] = summary_table = self._get_summary(google_query, interval_field,
            metrics=summary_metrics + [
                Column('ga:sessions', type_cast=int),
                Column('ga:adClicks', type_cast=int),
            ],
        )

        if not summary_table.has_value('ga:pageviews'):
            raise EmptyReportError()

        include_ads = self.config.get('ads') or summary_table.has_value('ga:adClicks') or summary_table.has_value('ga:itemRevenue')

        # Ads
        # FIXME: Merge this with summary_metrics once https://code.google.com/p/analytics-issues/issues/detail?id=693 is fixed.
        if include_ads:
            self.tables['ads'] = google_query.get_table(
                params={
                    'ids': 'ga:%s' % self.remote_id,
                    'start-date': self.date_start,
                    'end-date': self.date_end,
                },
                dimensions=[Column('ga:adGroup')],
                metrics=[
                    Column('ga:adCost', label="Ad Spend", type_cast=float, type_format=_format_dollars, threshold=0),
                    Column('ga:impressions', label="Ad Impressions", type_cast=int, type_format=h.human_int, threshold=0),
                    Column('ga:adClicks', label="Ad Clicks", type_cast=int, type_format=h.human_int, threshold=0),
                    Column('ga:itemRevenue', label="Revenue", type_cast=float, type_format=_format_dollars, threshold=0),
                    Column('ga:itemQuantity', label="Sales", type_cast=int, type_format=h.human_int, threshold=0),
                ],
            )

        # Pages
        pages_metrics = [col.new() for col in basic_metrics]
        if self.config.get('pageloadtime', True):
            pages_metrics += [
                Column('ga:avgPageLoadTime', label='Page Load', type_cast=float, type_format=h.human_time, reverse=True, threshold=0),
            ]

        self.tables['pages'] = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'sort': '-ga:pageviews',
                'max-results': '10',
            },
            dimensions=[
                Column('ga:pagePath', label='Pages', visible=1, type_cast=_prune_abstract),
            ],
            metrics=pages_metrics,
        )

        # Referrers

        current_referrers = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.date_start,
                'end-date': self.date_end,
                'filters': 'ga:medium==referral;ga:socialNetwork==(not set)',
                'sort': '-ga:pageviews',
                'max-results': '25',
            },
            dimensions=[
                Column('ga:fullReferrer', label='Referrer', visible=1, type_cast=_prune_referrer)
            ],
            metrics=[col.new() for col in summary_metrics],
        )

        last_referrers = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': self.previous_date_start,
                'end-date': self.previous_date_end,
                'filters': 'ga:medium==referral;ga:socialNetwork==(not set)',
                'sort': '-ga:pageviews',
                'max-results': '250',
            },
            dimensions=[
                Column('ga:fullReferrer', label='Referrer', visible=1, type_cast=_prune_referrer)
            ],
            metrics=[
                summary_table.get('ga:pageviews').new(),
            ],
        )
        inject_table_delta(current_referrers, last_referrers, join_column='ga:fullReferrer')

        self.tables['referrers'] = current_referrers

        if summary_table.has_value('ga:goalConversionRateAll'):
            # Goals
            try:
                self.tables['goals'] = self._get_goals(google_query, interval_field)
            except APIError:
                # Missing permissions for goals
                pass

        if summary_table.has_value('ga:itemRevenue'):
            # Ecommerce
            self.tables['ecommerce'] = self._get_ecommerce(google_query, interval_field)

        # Historic
        historic_start_date = self.previous_date_start
        if not is_year_delta and not is_quarter_delta:
            # Override to just previous month
            historic_start_date = self.date_end - datetime.timedelta(days=self.date_end.day)
            historic_start_date -= datetime.timedelta(days=historic_start_date.day-1)

        # Note: Pace is different from interval, as year pace is still month over month whereas year interval is year over year.
        compare_interval = self.config.get('pace', 'month')
        if compare_interval == 'year' and not is_year_delta:
            historic_start_date = self.date_end - datetime.timedelta(days=self.date_end.day-1)
            historic_start_date = historic_start_date.replace(year=historic_start_date.year-1)

        dimensions = [
            Column('ga:yearMonth', visible=0),
        ]
        if not is_year_delta:
            dimensions += [
                Column('ga:date'),
            ]

        historic_table = google_query.get_table(
            params={
                'ids': 'ga:%s' % self.remote_id,
                'start-date': historic_start_date,
                'end-date': self.date_end,
            },
            dimensions=dimensions,
            metrics=[
                Column('ga:pageviews', label='Views', type_cast=int, visible=1),
                Column('ga:users', label='Uniques', type_cast=int),
            ],
        )

        intro_config = self.config.get('intro')
        if intro_config:
            # For John Sheehan
            historic_table.set_visible('ga:yearMonth', intro_config)

        iter_historic = historic_table.iter_visible()
        _, views_column = next(iter_historic)

        if is_year_delta:
            iter_historic = ((mo[:4], v) for mo, v in iter_historic)
        elif compare_interval == 'year':
            mo_filter = u'{d.month:02d}'.format(d=historic_start_date)
            iter_historic = ((mo, v) for mo, v in iter_historic if mo.endswith(mo_filter))

        monthly_data, max_value = cumulative_by_month(iter_historic)
        if len(monthly_data) < 2:
            raise ValueError("invalid number of historic months", self.remote_id, historic_table.rows)
        if is_quarter_delta:
            # TODO: This needs to be generalized beyond months at this point, sigh
            monthly_data, max_value = cumulative_splitter(monthly_data, split_on=3)
        last_month, current_month = monthly_data[-2:]

        self.data['historic_data'] = encode_rows(monthly_data, max_value)
        self.data['total_units'] = '{:,} %s' % views_column.label.lower().rstrip('s')
        self.data['total_current'] = current_month[-1]
        self.data['total_last'] = last_month[-1]
        self.data['total_last_relative'] = last_month[min(len(current_month), len(last_month))-1]
        self.data['total_last_date_start'] = historic_start_date

        social_search_table = self._get_social_search(google_query, self.date_start, self.date_end, summary_metrics, max_results=25)
        last_social_search = self._get_social_search(google_query, self.previous_date_start, self.previous_date_end, summary_metrics, max_results=100)
        inject_table_delta(social_search_table, last_social_search, join_column='source')

        self.tables['social_search'] = social_search_table
        if self.config.get('search_keywords'):
            self.tables['search_keywords'] = self._get_search_keywords(google_query, interval_field=interval_field)
            self.tables['search_keywords'].tag_rows()

        if self.config.get('geo'):
            self.tables['geo'] = google_query.get_table(
                params={
                    'ids': 'ga:%s' % self.remote_id,
                    'start-date': self.date_start,
                    'end-date': self.date_end,
                    'sort': '-ga:pageviews',
                    'max-results': '5',
                },
                dimensions=[
                    Column('ga:country', label='Country', visible=1, type_cast=_prune_abstract),
                ],
                metrics=[col.new() for col in summary_metrics],
            )
            self.tables['geo'].tag_rows()


        self.tables['social_search'].tag_rows()
        self.tables['referrers'].sort(reverse=True)
        self.tables['referrers'].tag_rows()
        self.tables['pages'].sort(reverse=True)
        self.tables['pages'].tag_rows()
Пример #19
0
    def test_column(self):
        s = Column('foo', type_cast=int)
        self.assertEqual(s.cast('123'), 123)
        self.assertEqual(s.min_row, (None, None))
        self.assertEqual(s.max_row, (None, None))

        s = Column('foo', average=100, threshold=0.25)
        self.assertEqual(s.min_row, (100.0, None))
        self.assertEqual(s.max_row, (100.0, None))

        self.assertFalse(s.is_interesting(90))
        s.measure(90, 'a')
        self.assertEqual(s.min_row, (90.0, 'a'))
        self.assertEqual(s.max_row, (100.0, None))

        self.assertTrue(s.is_interesting(50))
        s.measure(50, 'b')
        self.assertEqual(s.min_row, (50.0, 'b'))
        self.assertEqual(s.max_row, (100.0, None))

        self.assertTrue(s.is_interesting(150))
        s.measure(150, 'c')
        self.assertEqual(s.min_row, (50.0, 'b'))
        self.assertEqual(s.max_row, (150.0, 'c'))

        self.assertTrue(s.is_interesting(200))
        s.measure(200, 'd')
        self.assertEqual(s.min_row, (50.0, 'b'))
        self.assertEqual(s.max_row, (200.0, 'd'))