示例#1
0
class OutputQueue(object):
    def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
        col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
        self.q = MongoPageQueue(col, jobid)

    @classmethod
    def from_crawler(cls, crawler):
        mongo_host = crawler.settings['MONGO_HOST']
        mongo_db = crawler.settings['MONGO_DB']
        mongo_col = crawler.settings['MONGO_QUEUE_COL']
        jobid = crawler.settings['JOBID']
        return cls(mongo_host, mongo_db, mongo_col, jobid)

    def process_item(self, item, spider):
        self.q.push(dict(item))
        return item
class OutputQueue(object):

    def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
        col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
        self.q = MongoPageQueue(col, jobid)

    @classmethod
    def from_crawler(cls, crawler):
        mongo_host = crawler.settings['MONGO_HOST']
        mongo_db = crawler.settings['MONGO_DB']
        mongo_col = crawler.settings['MONGO_QUEUE_COL']
        jobid = crawler.settings['JOBID']
        return cls(mongo_host, mongo_db, mongo_col, jobid)

    def process_item(self, item, spider):
        self.q.push(dict(item))
        return item
示例#3
0
 def setUp(self):
     c = MongoClient()
     c.drop_database('hci-test')
     col = c['hci-test']['crawler.pages']
     self.pagequeue = MongoPageQueue(col, 'JOBID')
示例#4
0
 def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
     col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
     self.q = MongoPageQueue(col, jobid)
 def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
     col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
     self.q = MongoPageQueue(col, jobid)
示例#6
0
 def setUp(self):
     c = pymongo.Connection()
     c.drop_database('hci-test')
     col = c['hci-test']['crawler.pages']
     self.pagequeue = MongoPageQueue(col, 'JOBID')