def scan(addr, min, max): task = Task() for ind in xrange(min, max): task.add(create_connection(addr, ind), CONNECT, CONNECT_ERR) task.start() xmap(task, DONE, lambda task: die())
def main(): task = Task(dict()) set_up_server() for ind in xrange(MAX_CON): sock = socket(AF_INET, SOCK_STREAM) con = Spin(sock) Client(con) xmap(con, CONNECT, on_connect) xmap(con, CONNECT_ERR, on_connect_err) con.connect_ex(('localhost', PORT)) def job(data, event, args, index=ind): base = data.setdefault(event, list()) base.append(('Con %s' % index, args[1])) return True task.gather(con, ('0', job), ('1', job), ('2', job), ('3', job), ('4', job), ('5', job), (CONNECT_ERR, lambda *args: True)) xmap(task, COMPLETE, done)
def on_connect(con): Stdin(con) Stdout(con) Shrug(con) xmap(con, FOUND, lambda con, data: spawn(con, data, data)) #xmap(con, FOUND, lambda con, data: sys.stdout.write('%s\n' % data)) print 'connected' task = Task(dict()) def job(data, event, args): base = data.setdefault(event, list()) base.append(args[1]) return True for ind in xrange(MAX_REQUEST): task.gather(con, ('%s' % ind, job)) xmap(task, COMPLETE, done)
class Miner(list): task = Task() task.add_map(DONE, lambda task: die()) task.start() def __init__(self, url, pool=None, headers=HEADERS, method='get', payload={}, auth=()): self.pool = pool self.url = url self.urlparser = urlparse(url) self.headers = headers self.method = method self.payload = payload self.auth = auth self.encoding = 'utf-8' self.response = None super(list, self).__init__() self.expand() def expand(self): """ No exception being raised. """ try: self.create_connection() except Exception as excpt: print(excpt) def setup(self, response): data = response.fd.read() # Reset the fd so it can be reread later. response.fd.seek(0) type = response.headers.get('content-type', 'text/html; charset=%s' % self.encoding) params = cgi.parse_header(type) # Sets the encoding for later usage # in self.geturl for example. self.encoding = params[1]['charset'] self.response = response data = data.decode(self.encoding, 'ignore') self.build_dom(data) def build_dom(self, data): pass def create_connection(self): if self.method == 'get': return Fetcher(self) return Poster(self) def geturl(self, reference): """ """ # It is necessary to encode back the url # because websnake get method inserts the host header # with the wrong encoding and some web servers wouldnt # accept it as valid header. reference = reference urlparser = urlparse(reference) url = urljoin('%s://%s' % (self.urlparser.scheme, self.urlparser.hostname), reference) \ if not urlparser.scheme else reference return url def next(self, reference): self.url = self.geturl(reference) self.urlparser = urlparse(self.url) self.expand() def run(self, dom): """ Implement your rules here. """ pass
con.dump('GET / HTTP/1.1\r\n') con.dump('Host: %s\r\n' % url) con.dump('Connection: TE, close\r\n') con.dump('User-Agent: UntwistedDownload/1.0\r\n\r\n') def on_close(con, err, url): with open(url, 'w') as fd: fd.write(con.accumulator.data) def done(task, data): raise Kill task = Task(dict()) xmap(task, COMPLETE, done) job = lambda data, event, args: True for ind in urls: con = Spin() Client(con) con.connect_ex((ind, 80)) xmap(con, CONNECT, on_connect, ind) xmap(con, CLOSE, lambda con, err: lose(con)) xmap(con, CLOSE, on_close, ind) xmap(con, CONNECT_ERR, lambda con, err: lose(con)) task.gather(con, (CLOSE, job), (CONNECT_ERR, job)) core.gear.mainloop()
class Miner(list): task = Task() task.add_map(DONE, lambda task: die()) task.start() def __init__(self, url, headers=default_headers, args={}, method='get', payload=None, auth=None, attempts=5): """ Resource Param: url Headers to be send. Param: headers Url query. Param: args The HTTP method. Param: method The payload data in case of method is 'post'. Param: payload Authentication user/pass. Param: auth The number of times a given url should be tried in case of corrupted response. Param: attempts. """ self.url = url self.auth = auth self.args = args self.encoding = 'utf-8' self.response = None self.headers = headers self.payload = payload self.method = method self.attempts = attempts self.urlparser = urlparse(url) super(list, self).__init__() self.next(self.url) def setup(self, response): # Reset the fd so it can be reread later. data = response.fd.read() response.fd.seek(0) type = response.headers.get('content-type', 'text/html; charset=%s' % self.encoding) # Sets the encoding for later usage # in self.geturl for example. params = cgi.parse_header(type) self.encoding = params[1]['charset'] self.response = response data = data.decode(self.encoding, 'ignore') self.build_dom(data) def build_dom(self, data): pass def handle_success(self, request, response): self.setup(response) def fetcher(self): request = Get(self.url, headers=self.headers, auth=self.auth, attempts=self.attempts) self.task.add(request, ResponseHandle.ERROR, ResponseHandle.DONE) request.add_map('200', self.handle_success) return request def poster(self): request = Post(self.url, headers=self.headers, payload=self.payload, auth=self.auth, attempts=self.attempts) self.task.add(request, ResponseHandle.ERROR, ResponseHandle.DONE) request.add_map('200', self.handle_success) return request def geturl(self, reference): """ """ urlparser = urlparse(reference) if not urlparser.scheme: return urljoin( '%s://%s' % (self.urlparser.scheme, self.urlparser.hostname), reference) return reference def next(self, reference): self.url = self.geturl(reference) self.urlparser = urlparse(self.url) if self.method == 'get': return self.fetcher() return self.poster() def run(self, dom): """ Implement your rules here. """ pass
from untwisted.core import die from untwisted import core from untwisted.task import Task from untwisted.job import Job, DONE import time def sum(x, y): time.sleep(3) return x + y def show(job, result): print(result) task = Task() # Tell the task it can start trigging events. task.start() for ind in range(100): job = Job(sum, ind, 1000) job.add_map(DONE, show) task.add(job, DONE) task.add_map(DONE, lambda task: die()) core.gear.mainloop()
class Download(object): task = Task(dict()) job = lambda self, data, event, args: True def __init__(self, addr, rsc): self.addr = addr self.rsc = rsc con = Spin() Client(con) con.connect_ex((addr, 80)) xmap(con, CONNECT, self.on_connect) xmap(con, CLOSE, lambda con, err: lose(con)) # The event CLOSE is binded to self.on_close before # we call self.task.gather on con, it needs to be so # otherwise it might happen of the last Download # instance to finish spawn CLOSE and the COMPLETE # event be spawned too early. It needs first # check whether there is a Location http key in the # headers. xmap(con, CLOSE, self.on_close) xmap(con, CONNECT_ERR, lambda con, err: lose(con)) self.task.gather(con, (CLOSE, self.job), (CONNECT_ERR, self.job)) def on_connect(self, con): Stdin(con) Stdout(con) Accumulator(con) con.dump(HTTP_HEADER % (self.rsc, self.addr)) def on_close(self, con, err): data = con.accumulator.data try: # It might happen of the webserver # sending only the http header then splitting up # will raise an exception. header, data = data.split('\r\n\r\n', 1) # I lower all letters so i don't have to worry # when indexing location in the dict. header = header.lower() # It builds the http header. header = findall(REG_HEADER, header) header = dict(header) # If it occurs of the document having moved # to other place then we follow the link. addr = header['location'] _, addr = addr.split('//') addr, rsc = addr.split('/', 1) # After properly extracting the new documment # address we download it. Download(addr, rsc) except: # If some exception occured all what we want is saving # what we have in hands. with open(self.addr, 'w') as fd: fd.write(data)