def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'HenanGaokaoSpider' self.pagestore = HenanGaokaoStore() self.job_spliter = HenanSpliter() self._cur_page = 0 self._test_mode = True
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'WulumuqiCourt' self.pagestore = WLMQCourtStore() self.job_spliter = WLMQSpliter() self._test_mode = False Doc2Txt.init()
def __init__(self, threadcnt=10, seed_file=None, mode='links', list_file='links', recover=False, test=False): CourtSpider.__init__(self, threadcnt) self._name = 'HangzhouCourt' self.pagestore = HZCourtStore() self.job_spliter = HZSpliter() self._test_mode = test self.pagestore.testmode = test self.list_data = { 'pageno': '1', 'pagesize': '20', 'ajlb': '', 'cbfy': '1300', 'ah': '', 'jarq1': '19700101', 'jarq2': time.strftime('%Y%m%d', time.localtime()), 'key': '' } self.seed_file = seed_file self.page_size = 50 self.mode = mode self.list_file = list_file self.recover = recover self.today = time.strftime('%Y%m%d', time.localtime()) self.link_saver = LinkSaver(self.list_file)
def __init__(self, endFbrq='', startFbrq='', thread_cnt=5): CourtSpider.__init__(self, thread_cnt) self._name = 'ChangchunCourt' self.pagestore = CCCourtStore() self.job_spliter = CCSpliter() self.startFbrq = startFbrq self.endFbrq = endFbrq
def __init__(self, threadcnt, list_seeds=None): CourtSpider.__init__(self, threadcnt) self._name = 'FoshanCourt' self.pagestore = FSCourtStore() self.job_spliter = FSSpliter() self._test_mode = True self.page_size = 20 self.list_seeds = list_seeds
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'YantianCourt' self.pagestore = YantianCourtStore() self.job_spliter = YantianSpliter() self._cur_page = 0 self._test_mode = True self._remain_job_file = 'jobs_remain' self.register_signal()
def __init__(self, thread_count=5, name='ShanghaiCourtListSpider', log='list.spider.log', out='links', recover=False): CourtSpider.__init__(self, thread_count, log) self._name = name self.pagestore = ShanghaiSeedStore() self.linkdb = ShanghaiLinkDb('sh_link') self.seedb = ShanghaiLinkDb('sh_seed') self.link_saver = LinkSaver(out) self.lock = threading.Lock() self.pager_failed_count = 0 self.recover = recover
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'FutianCourt' self.pagestore = FutianCourtStore() self.job_spliter = FutianSpliter() self._test_mode = True
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'dgCourt' self.pagestore = DGCourtStore() self.job_spliter = DGSpliter() Doc2Txt.init()
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'cqnaCourt' self.pagestore = CQNACourtStore() self.job_spliter = CQNASpliter()
def __init__(self, threadcnt): CourtSpider.__init__(self, threadcnt) self._name = 'FoshanLinkSpider' self._test_mode = True self.page_size = 20 self.link_saver = LinkSaver("links")