Пример #1
0
 def run(self, **kwargs):
     if not all(map(lambda x: x in kwargs, ['title', 'route', 'kwargs'])):
         raise Exception('invalid schedule %s' % kwargs)
     doc = {
         "title": kwargs.pop('title'),
         "route": kwargs.pop('route'),
         "created": time.time(),
         "incremental": kwargs.pop('incremental', False),
         "kwargs": kwargs.pop('kwargs'),
     }
     if doc['title'] in self.collection.distinct('title'):
         raise Exception('Existed schedule title %s' % doc['title'])
     _id = self.collection.insert(doc)
     doc_seed = {
         "_scheduleId": _id,
         "_status": None,
         "_seedId": None,
         "_ancestors": [],
         "route": doc['route'],
         "incremental": doc['incremental'],
         "created": time.time(),
         "updated": time.time(),
         "kwargs": doc['kwargs']
     }
     _id = env.MONGO.seeds.insert(doc_seed)
     Route().run(doc['route'], _id, 'seeds')
Пример #2
0
 def gen_next_job(self, next_route, _id):
     Route().run(next_route, _id, 'webpages')
Пример #3
0
 def gen_next_job(self, route, _id):
     Route().run(self.next_job, _id, 'webpages')
Пример #4
0
def start_ml():
    _ids = env.MONGO.streams.distinct('_id')
    route = 'ml.weibo.statuses'
    for _id in _ids:
        Route().run(route, _id, 'streams')
Пример #5
0
def start_etl():
    _ids = env.MONGO.webpages.distinct('_id')
    route = 'etl.extractors.weibo.statuses'
    for _id in _ids:
        Route().run(route, _id, 'webpages')
Пример #6
0
def start_crawler():
    _id = env.MONGO.seeds.distinct('_id')[0]
    Route().run('crawler.weibo.statuses', _id, 'seeds')
Пример #7
0
 def gen_next_job(self, _id):
     Route().run(self.next_job, _id, 'streams')