/
ListAllDiffs14.py
82 lines (72 loc) · 2.52 KB
/
ListAllDiffs14.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import liblytics
import csv
import time
from sets import Set
videoNames = {}
with open("Video Names F14 2.csv", "rU") as f:
reader = csv.reader(f)
for row in reader:
videoNames[row[0]] = row[1] #cannot remember how this line works exactly
usernames = Set()
with open("SPOC Grades Edited.csv", "rb") as f:
reader = csv.reader(f)
for row in reader:
usernames.add(row[1])
data={} #first dict with username (key) : second dict (value)
def parseEventText(eventLine):
line = eventLine.replace("{","")
line = line.replace("}","")
units = line.split(",")
result = {}
for unit in units:
pieces = unit.split(":")
result[pieces[0].replace('"',"")] = pieces[1].replace('"',"")
return result
for line in liblytics.read_log_file("umass_boston-edge-events-ALL.log.gz"): #Reads line in log file
if (line["event_type"] == "play_video"): # Grabs only play_videos
username=line["username"]
t = time.strptime(line['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
times = time.mktime(t)
#first loop creates first dict
if username != "":
videoName = parseEventText(line["event"])["id"]
if username not in data:
data[username] = {}
usersDict = data[username]
#creates third dict with playCount and Times (keys) and their values
if videoName not in usersDict: #second loop creates third dict
videoDict = {}
videoDict["Times"] = []
videoDict["Playcount"] = 1
usersDict[videoName] = videoDict
videoDict = usersDict[videoName]
videoDict["Times"].append(times)
videoDict["Playcount"] = len(videoDict["Times"])
usersDict[videoName] = videoDict
data[username] = usersDict
#calculate list of differences
monsterList = []
for username in sorted(usernames):
if username in data:
usersDict = data[username]
for cleanName in sorted(videoNames):
if videoNames[cleanName] in data[username]:
videoDict = usersDict[videoNames[cleanName]]
timeList = sorted(videoDict["Times"])
diffList = []
timecount = len(timeList)
while timecount > 1:
T1 = timecount - 1
T2 = timecount - 2
secdiff = timeList[T1] - timeList[T2]
#idea, if secdiff = 0 then print the username, the videoName, and the time list
monsterList.append(secdiff)
timecount=timecount-1
for username in data:
for videoName in data[username]:
print data[username][videoName]["Playcount"]
f = open("Monster List of Diff F14.csv", "w")
for x in monsterList:
f.write(str(x))
f.write("\n")
f.close()