-
Notifications
You must be signed in to change notification settings - Fork 1
/
metrics.py
148 lines (123 loc) · 5.35 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
from __future__ import division
from collections import Counter
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import graph2
import elo
import cau
DB_NAME = "stackexchangedb"
DB_USER = "postgres"
percentiles = [i*0.01 for i in range(0, 101)]
def connect(db=DB_NAME, user=DB_USER):
"""Connect to the specified Postgres database as the specified user."""
conn = psycopg2.connect("dbname={} user={}".format(db, user))
cur = conn.cursor()
return conn, cur
def results(cursor):
"""Generator for iterating over query results one at a time."""
while True:
result = cursor.fetchone()
if result is None:
break
yield result
def get_start_time(userID, cur):
print userID
query = "select creation_date from se_user where id = %(id)s"
cur.execute(query, {'id': userID})
return [i[0] for i in results(cur)][0]
def percentile_normalization(userID, cur, sampling_percentiles):
"""returns a vector of end times for percentiles 0, 10, 20,...100. This end time should be used inclusively"""
times = []
query1 = "select creation_date from post where owner_user_id = %(id)s and (post_type_id = 1 or post_type_id = 2) order by creation_date"
cur.execute(query1, {'id': userID})
posts = [i[0] for i in results(cur)]
start = get_start_time(userID, cur)
for p in sampling_percentiles:
x = int(len(posts) * p)
if x == 0:
times.append(start)
else:
times.append(posts[x-1])
return times
def total_answers_helper(cur, start_time, end_time, userID):
query = "select count(*) from post where owner_user_id = %(id)s and post_type_id = 2 and creation_date >= %(start_time)s and creation_date <= %(end_time)s"
cur.execute(query, {'start_time': start_time, 'end_time': end_time, 'id': userID})
return int([i[0] for i in results(cur)][0])
def total_answers(userID, cur):
times = percentile_normalization(userID, cur, percentiles)
result = []
start_time = get_start_time(userID, cur)
for time in times:
result.append(total_answers_helper(cur, start_time, time, userID))
return result
def total_accepted_answers_helper(cur, start_time, end_time, userID):
query = "select count(x.id) from post x, post y where y.accepted_answer_id = x.id and x.owner_user_id = %(id)s and x.post_type_id = 2 and x.creation_date >= %(start_time)s and x.creation_date <= %(end_time)s"
cur.execute(query, {'start_time': start_time, 'end_time': end_time, 'id': userID})
return int([i[0] for i in results(cur)][0])
def total_accepted_answers(userID, cur):
times = percentile_normalization(userID, cur, percentiles)
result = []
start_time = get_start_time(userID, cur)
for time in times:
result.append(total_accepted_answers_helper(cur, start_time, time, userID))
return result
def get_indegree_at_time(cur, userID, time):
graph = graph2.build_graph_before(cur, time)
indegrees = graph2.indegree(graph)
return indegrees[userID]
def get_betweenness_at_time(cur, userID, time):
graph = graph2.build_graph_before_undirected(cur, time)
betweenness = graph2.betweenness(graph)
return betweenness[userID]
def get_closeness_at_time(cur, userID, time):
graph = graph2.build_graph_before_undirected(cur, time)
return graph2.closeness(graph, userID)
def get_pagerank_at_time(cur, userID, time):
graph = graph2.build_graph_before(cur, time)
ranks = graph2.pagerank(graph)
return ranks[userID]
def get_auth_at_time(cur, userID, time):
graph = graph2.build_graph_before(cur, time)
ranks = graph2.hits(graph)
return ranks[userID][1]
def get_cau_at_time(cur, conn, userID, time):
return cau.cau(cur, conn, userID, time)
def get_elo_at_time(cur, conn, userID, time):
return elo.elo(cur, conn, userID, time)
def cau_for_user(cur, conn, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_cau_at_time(cur, conn, userID, t) for t in times]
def elo_for_user(cur, conn, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_elo_at_time(cur, conn, userID, t) for t in times]
def pagerank_for_user(cur, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_pagerank_at_time(cur, userID, t) for t in times]
def auth_for_user(cur, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_auth_at_time(cur, userID, t) for t in times]
def indegree_for_user(cur, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_indegree_at_time(cur, userID, t) for t in times]
def betweenness_for_user(cur, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_betweenness_at_time(cur, userID, t) for t in times]
def closeness_for_user(cur, userID, samples = None):
if not samples:
samples = percentiles
times = percentile_normalization(userID, cur, samples)
return [get_closeness_at_time(cur, userID, t) for t in times]