示例#1
0
import logging

from dataflow import DataFlow
from workers import head_url, node_url, newfile_filter, ignore_filter, file_to_db

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    filename='/var/log/homulili/scanner.log',
    filemode='w',
)
logger = logging.getLogger(__name__)

master_update_interval = timedelta(minutes=30).total_seconds()
madokami_stage_interval = timedelta(seconds=10).total_seconds()

logger.info('Starting scanner')
df = DataFlow()
x = df.rate_limited_node(target=head_url, interval=master_update_interval)
x = df.rate_limited_node(input=x.out,
                         target=node_url,
                         interval=madokami_stage_interval)
x = df.node(input=x.out, target=newfile_filter)
x = df.node(input=x.out, target=ignore_filter)
x = df.node(input=x.out, num_outputs=0, target=file_to_db)

logger.debug('Scanner graph initialized')

df.run()
示例#2
0
import logging

from dataflow import DataFlow
from workers import get_manga_ids, urls_from_db, name_file, download_file, update_db

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    filename='/var/log/homulili/scraper.log',
    filemode='w',
)
logger = logging.getLogger(__name__)

master_interval = timedelta(minutes=5).total_seconds()
madokami_file_interval = timedelta(seconds=30).total_seconds()

logger.info('Starting scraper')
df = DataFlow()
x = df.rate_limited_node(interval=master_interval, target=get_manga_ids)
x = df.node(input=x.out, target=urls_from_db)
x = df.node(input=x.out, target=name_file)
x = df.rate_limited_node(interval=madokami_file_interval,
                         input=x.out,
                         target=download_file)
x = df.node(input=x.out, target=update_db, num_outputs=0)

logger.debug('Scraper graph initialized')

df.run()