# -*- coding: utf-8 -*- import time from rdc.etl.harness.threaded import ThreadedHarness from rdc.etl.contrib.example import build_producer, run from rdc.etl.status.http import HttpStatus from rdc.etl.transform import Transform h = ThreadedHarness() p1 = build_producer('Producer 1', count=500) @Transform def delay(h, c): time.sleep(0.2) yield h @Transform def delay2(h, c): time.sleep(0.5) yield h h.add_chain(p1, delay, delay2) h.status.append(HttpStatus()) run(h)
@Filter def filter_empty(hash, channel=STDIN): return len(hash['tld']) @Transform def add_www(hash, channel=STDIN): return hash.update({'tld': 'www'+hash['tld']}) @Join def resolve_domain_name(hash, channel=STDIN): for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(hash['tld'], 80): yield { 'family': family, 'socktype': socktype, 'proto': proto, 'canonname': canonname, 'sockaddr': sockaddr, 'ip': sockaddr[0], } harness = ThreadedHarness() harness.add_chain( extract_supported_domains, filter_empty, add_www, resolve_domain_name, Log() ) harness()
@Filter def filter_empty(hash, channel=STDIN): return len(hash['tld']) @Transform def add_www(hash, channel=STDIN): return hash.update({'tld': 'www' + hash['tld']}) @Join def resolve_domain_name(hash, channel=STDIN): for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo( hash['tld'], 80): yield { 'family': family, 'socktype': socktype, 'proto': proto, 'canonname': canonname, 'sockaddr': sockaddr, 'ip': sockaddr[0], } harness = ThreadedHarness() harness.add_chain(extract_supported_domains, filter_empty, add_www, resolve_domain_name, Log()) harness()
# -*- coding: utf-8 -*- import time from rdc.etl.harness.threaded import ThreadedHarness from rdc.etl.extra.example import build_producer, run from rdc.etl.status.console import ConsoleStatus from rdc.etl.status.http import HttpStatus from rdc.etl.transform import Transform h = ThreadedHarness() p1 = build_producer('Producer 1', count=500) @Transform def delay(h, c): time.sleep(0.2) yield h @Transform def delay2(h, c): time.sleep(0.5) yield h h.add_chain(p1, delay, delay2) h.status.append(ConsoleStatus()) h.status.append(HttpStatus()) run(h)