def check_should_fetch(self, jdid): if not PageStoreBase.check_should_fetch(self, jdid): return False if CData.getholes: indexUrl = "%s://%s" % (self.channel, jdid) if self.find_any(indexUrl): return False return True
def __init__(self): PageStoreBase.__init__(self, "qixinbao")
def __init__(self, channel, dburl="mongodb://*****:*****@localhost/admin"): PageStoreBase.__init__(self, channel, dburl)
def __init__(self): PageStoreBase.__init__(self, 'jd_51job')#, dburl='mongodb://localhost/page') # self.testmode = 1 self.hdoc = None
def __init__(self): PageStoreBase.__init__(self, 'co_zhuopin', 'mongodb://hadoop2/co_crawler') # self.testmode = 1 self.hdoc = None
def __init__(self, channel, dburl="mongodb://hadoop2/co_crawler"): PageStoreBase.__init__(self, channel, dburl)
def __init__(self): PageStoreBase.__init__(self, 'jd_zhilian') self.hdoc = None
def __init__(self): PageStoreBase.__init__(self, "qichacha")
def __init__(self): PageStoreBase.__init__(self, 'cv_liepin', dburl=LPCVConfig.mongdb_url) self.testmode = False self._not_need_cv_fs = FileSaver(LPCVConfig.NOT_NEED_CV_FN) self._not_access_by_qiye = FileSaver(LPCVConfig.NOT_ACCESS_BY_QIYE)
def __init__(self): PageStoreBase.__init__(self, mongo_channel, mongo_cvdb_url)
def __init__(self, channel, dburl): PageStoreBase.__init__(self, channel, dburl) # 子类需要覆盖 self.log = None
def __init__(self): PageStoreBase.__init__(self, 'cv_zhilian', mongo_cvdb_url)
def __init__(self): PageStoreBase.__init__(self, 'cv_chinahr', dburl='mongodb://hadoop2/cv_crawler')