forked from PKU-Dragon-Team/back-mobile-data-visualization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
batch_tag_matrix.py
54 lines (47 loc) · 1.61 KB
/
batch_tag_matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
# encoding: utf-8
from db import MySQL as DB
from batch_common import fetch_user_location_logs
from batch_common import dbconfig
from batch_common import fetch_users
from merge_locations import merge_locations
from periodic_probability_matrix import generate_matrix
from tag_config import clean_tags
from app import fetch_semantic_data
import sys
import json
from csv import DictWriter
import sys
import imp
imp.reload(sys)
exec("sys.setdefaultencoding('utf-8')");
def run():
output = open(sys.argv[1], 'w')
writer = DictWriter(output, fieldnames=['uid', 'data'])
writer.writeheader()
db = DB(dbconfig)
for uid in fetch_users(db):
data = fetch_user_location_logs(uid, db)
locations = merge_locations(data)
matrix = generate_matrix(locations)
semantic_data = fetch_semantic_data(list(matrix.keys()))
semantic_dict = {}
for row in semantic_data:
semantic_dict[row['location']] = clean_tags(row['tags'], 5)
tag_matrix = {}
for location, proba in list(matrix.items()):
tag_dict = semantic_dict[location]
tag_weight = sum(v for v in list(tag_dict.values()))
if tag_weight == 0:
continue
for tag, cnt in list(tag_dict.items()):
tag_matrix.setdefault(tag, [0] * 48)
for i in range(48):
tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001)
writer.writerow({
'uid': uid,
'data': json.dumps(tag_matrix)
})
output.close()
if __name__ == '__main__':
run()