Пример #1
0
import urllib2
from processor import Processor, DbgProcessor
import logging
base='http://en.wikipedia.org/wiki/%s'
urls=[base % year for year in range(1950,2000)]

class Checker(object):

    def __init__(self):
        self.opener = urllib2.build_opener()
        self.opener.addheaders = [('User-agent', 'Mozilla/5.0')]

    def check(self,url,text):
        infile = self.opener.open(url)
        data=infile.read()
        return url.split("/")[-1], text.lower() in data.lower()

N=5
p=Processor(Checker(),'check',N,logfile='/tmp/wiki.log',loglevel=logging.DEBUG)

search_string='marilyn monroe'
for url in urls: p.add(url,search_string)
p.start()
while not p.done(): time.sleep(1)
results=p.get_results()[0]
results.sort()
print "Years in which the string '%s' appears: %s" % (search_string, [int(r[0]) for r in results if r[1]])