Пример #1
0
Created on 2014.1.1

@author: gaolichuang
'''
import random
from oslo.config import cfg
from eventlet import greenthread
from miracle.common.manager import periodic_task
from miracle.common.manager import manager
from miracle.common.utils.gettextutils import _  # noqa
from miracle.common.base import log as logging

CONF = cfg.CONF
CONF.import_opt('periodic_report_tasks_interval', 'miracle.common.service.service')

LOG = logging.getLogger(__name__)

class ManagerContainer(periodic_task.PeriodicTasks):
    '''
    ManagerContainer can contain many Manager
    '''
    def __init__(self, manager = None, number = 0):
        self._input_queue = None
        self._output_queue = None
        self.managers = []
        self.number = number
        LOG.info(_("=====================Start %s number:%s===================="% (manager,self.number)))
        i = 0
        while i < self.number:
            _manager = manager()
            self.managers.append(_manager)
Пример #2
0
    cfg.IntOpt('max_fail_retry_time', default=2, help='crawl fail retry time'),
    cfg.IntOpt('max_timeout_retry_time',
               default=2,
               help='crawl timeout retry time'),
    cfg.IntOpt('read_batch_num',
               default=80,
               help='read batch number like db limit'),
    cfg.IntOpt('crawl_timeout', default=3600, help='time out interval'),
    cfg.StrOpt('init_url', default='', help='init url to crawl'),
    cfg.StrOpt('init_url_file', default='', help='init url file to crawl'),
]

CONF = cfg.CONF
CONF.register_opts(scheduler_opt)

LOG = logging.getLogger(__name__)


class DummySchedulerManager(manager.CrawlManager):
    def __init__(self):
        super(DummySchedulerManager, self).__init__()

    def run_periodic_report_tasks(self, service):
        '''TODO: fill url host level and something
            make some nessary check'''
        doc = CrawlDoc()
        doc.request_url = 'http://roll.sohu.com/'
        #        doc.request_url = 'http://www.163.com/'
        doc.url = doc.request_url
        doc.docid = mmh3.hash(doc.url)
        doc.level = 1