-
Notifications
You must be signed in to change notification settings - Fork 0
/
wc_pathos.py
executable file
·93 lines (72 loc) · 2.08 KB
/
wc_pathos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python
import os
import urllib
import operator
import timing
from pathos.parallel import ParallelPool as Pool
pool = Pool()
#url = 'http://www.gutenberg.org/cache/epub/25990/pg25990.txt'
#f = urllib.urlopen(url)
manager_address = 'localhost'
f = open('/root/homework/pg25990.txt')
def info(title):
print title
print 'module name:', __name__
if hasattr(os, 'getppid'):
print 'parent process:', os.getppid()
print 'process id:', os.getpid()
@timing.timed
def m_word_list(file):
info('server word_list function')
words = []
words.append(f.read().split())
# print words
# print "time: %s" % m_word_list.timed()
return words
def c_word_list(file):
info('client word_list function')
def m_cleanup(words):
info('cleanup function')
only_words = []
symbols = "~!@#$%^&*()_+=-{}\|\\][:\"\';<>?/.,"
for word in words:
for i in range(0, len(symbols)):
word = word.replace(symbols[i], "")
if len(word) != 0:
only_words.append(word)
# print only_words
return only_words
def c_cleanup(words):
info('cleanup function')
def m_word_dict(clean):
info('word_dict function')
word_count = manager.dict()
return word_count
def c_word_dict(clean):
info('word_dict function')
for word in clean:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
return word_count
def top_ten(wordcount):
for key, value in sorted(word_count.items(), key=operator.itemgetter(1), reverse=True)[:10]:
print key, value
def wordcount(f):
words = word_list(f)
clean = cleanup(words)
word_dict(clean)
if __name__ == '__main__':
info('Mainline function')
pool.ncpus = 2
pool.servers = ('localhost:17320',)
words = pool.map(m_word_list, f)
pool.join()
pool.servers = ('localhost:17320',)
clean_list = pool.map(m_cleanup, words)
pool.join(clean_list, )
pool.servers = ('localhost:17320',)
word_count = pool.map(word_dict, cleanlist)
pool.join(word_count, )
top_ten(word_count)