-
Notifications
You must be signed in to change notification settings - Fork 0
/
trending.py
64 lines (44 loc) · 1.86 KB
/
trending.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import googleanalytics as ga
import collections
import numpy
import datetime
SMOOTHER = 20
WINDOW = 8
GROWTH_THRESHOLD = 0.03
def trend(counts) :
X, Y = zip(*counts)
X = numpy.array([x.toordinal() for x in X])
X -= datetime.date.today().toordinal()
A = numpy.array([numpy.ones(len(X)), X])
Y = numpy.log(numpy.array(Y))
w = numpy.linalg.lstsq(A.T,Y)[0]
return w
profile = ga.authenticate(identity='sunspot',
account='Illinois Campaign for Political Reform',
webproperty='Illinois Sunshine',
profile='Illinois Sunshine')
#profile = ga.authenticate(identity='sunspot',
# account='Councilmatic',
# webproperty='Chicago Councilmatic',
# profile='Chicago Councilmatic')
totals = profile.core.query.metrics('pageviews').\
daily(days=-WINDOW)
totals = {date : count for date, count in totals.rows}
pages = profile.core.query.metrics('pageviews').\
dimensions('pagepath').\
daily(days=-WINDOW)
page_counts = collections.defaultdict(dict)
normalized_page_counts = collections.defaultdict(dict)
smooth_denom = SMOOTHER * len(pages.rows)
for date, page, count in pages.rows :
page_counts[page][date] = count
normalized_page_counts[page][date] = (count + SMOOTHER)/(totals[date] + smooth_denom)
for counts in normalized_page_counts.values() :
for date in totals.keys() - counts.keys() :
counts[date] = SMOOTHER/(totals[date] + smooth_denom)
for page, counts in normalized_page_counts.items() :
b0, b1 = trend(counts.items())
if b1 > GROWTH_THRESHOLD : #and page.startswith('/committees/') :
print(page, b0, b1)
for count in sorted(page_counts[page].items()) :
print(count)