def __init__(self, channel, save='fulltext.seed.txt', db='zhuanli', dburl='mongodb://localhost/zhuanli'): CWPParser.__init__(self, channel, channel, db, dburl) self.seed_saver = LinkSaver(save)
def __init__(self): CWPParser.__init__(self, 'abs_list', 'abs_list', 'zhuanli') self.store = PatentStore('abstract') self.failed_link = LinkSaver('abstract.parser.failed.txt') self.url_format = 'http://epub.sipo.gov.cn/dxbdl.action?strSources=fmmost&strWhere=%s&recordCursor=0&strLicenseCode=&action=dxbdln' self.save_count = 0
def __init__(self): CWPParser.__init__(self, 'fs_court2', 'fs') self.store = FSCourtStore('fs_court')
def init(self): print 'job start at', datetime.datetime.now() return CWPParser.init(self)
def __init__(self): CWPParser.__init__(self, 'shanghai_court', 'court') self.an_saver = LinkSaver('ah.%s.txt' % self.name)
def __init__(self): CWPParser.__init__(self, 'shanghai_court', 'shanghai_court') self.pagestore = ShanghaiCourtStore('sh_court_2') self.link_saver = LinkSaver('wrong.id.txt')
def __init__(self, channel, dist_file, name, parser): CWPParser.__init__(self, channel, name) self.bin_writer = BinSaver(dist_file) self.parser = parser
def __init__(self): CWPParser.__init__(self, 'fs_court', 'fs') self.saver = LinkSaver('seed.txt')