/
cluster.py
71 lines (55 loc) · 1.69 KB
/
cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/python
import sys
import pickle
import correlate
from collections import deque
def main():
threshold = float(sys.argv[2])
indexname = sys.argv[1]
patterns = pickle.loads(open(indexname, "r").read())
patterns = patterns[::int(sys.argv[3])]
values = []
matches = []
for i in xrange(0, len(patterns)):
matches.append([i])
for k in xrange(i + 1, len(patterns)):
value = correlate.pearson(patterns[i], patterns[k])
values.append((i, k, value))
if value > threshold:
matches[i].append(k)
firstresult = []
secondresult = []
rpatterns = []
for ps in matches:
if len(ps) <= 1:
continue
plen = int(sys.argv[4])
presult = []
for k in xrange(0, plen):
average = 0
for x in ps:
average += patterns[x][k]
average /= len(ps)
presult.append(average)
rpatterns.append(presult)
for i, k, value in values:
if value > threshold:
firstresult.extend(patterns[i])
secondresult.extend(patterns[k])
print "(" + str(i) + " x " + str(k) + " = " + str(value) + ")"
print pickle.dumps(rpatterns)
ff = open(indexname + ".first", "w")
sf = open(indexname + ".second", "w")
rf = open(indexname + ".average", "w")
for rpattern in rpatterns:
for rvalue in rpattern:
rf.write(str(rvalue) + "\n")
rf.close()
for value in firstresult[:]:
ff.write(str(value) + "\n")
ff.close()
for value in secondresult[:]:
sf.write(str(value) + "\n")
sf.close()
if __name__ == "__main__":
main()