示例#1
0
文件: http.py 项目: jmorel/rdc.etl
# -*- coding: utf-8 -*-
import time

from rdc.etl.harness.threaded import ThreadedHarness
from rdc.etl.contrib.example import build_producer, run
from rdc.etl.status.http import HttpStatus
from rdc.etl.transform import Transform

h = ThreadedHarness()
p1 = build_producer('Producer 1', count=500)

@Transform
def delay(h, c):
    time.sleep(0.2)
    yield h

@Transform
def delay2(h, c):
    time.sleep(0.5)
    yield h

h.add_chain(p1, delay, delay2)
h.status.append(HttpStatus())
run(h)
@Filter
def filter_empty(hash, channel=STDIN):
    return len(hash['tld'])

@Transform
def add_www(hash, channel=STDIN):
    return hash.update({'tld': 'www'+hash['tld']})

@Join
def resolve_domain_name(hash, channel=STDIN):
    for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(hash['tld'], 80):
        yield {
            'family': family,
            'socktype': socktype,
            'proto': proto,
            'canonname': canonname,
            'sockaddr': sockaddr,
            'ip': sockaddr[0],
            }

harness = ThreadedHarness()
harness.add_chain(
    extract_supported_domains,
    filter_empty,
    add_www,
    resolve_domain_name,
    Log()
)
harness()
示例#3
0

@Filter
def filter_empty(hash, channel=STDIN):
    return len(hash['tld'])


@Transform
def add_www(hash, channel=STDIN):
    return hash.update({'tld': 'www' + hash['tld']})


@Join
def resolve_domain_name(hash, channel=STDIN):
    for family, socktype, proto, canonname, sockaddr in socket.getaddrinfo(
            hash['tld'], 80):
        yield {
            'family': family,
            'socktype': socktype,
            'proto': proto,
            'canonname': canonname,
            'sockaddr': sockaddr,
            'ip': sockaddr[0],
        }


harness = ThreadedHarness()
harness.add_chain(extract_supported_domains, filter_empty, add_www,
                  resolve_domain_name, Log())
harness()
示例#4
0
# -*- coding: utf-8 -*-
import time

from rdc.etl.harness.threaded import ThreadedHarness
from rdc.etl.extra.example import build_producer, run
from rdc.etl.status.console import ConsoleStatus
from rdc.etl.status.http import HttpStatus
from rdc.etl.transform import Transform

h = ThreadedHarness()
p1 = build_producer('Producer 1', count=500)


@Transform
def delay(h, c):
    time.sleep(0.2)
    yield h


@Transform
def delay2(h, c):
    time.sleep(0.5)
    yield h


h.add_chain(p1, delay, delay2)
h.status.append(ConsoleStatus())
h.status.append(HttpStatus())
run(h)