class Worker(threading.Thread): def __init__(self, job_queue): threading.Thread.__init__(self) self.job_queue = job_queue self.ods = DataSaver() self.sfs = DataSaver() def run(self): tname = threading.current_thread().getName() ods_policy = { 'roll_policy': 'time:hour', 'namefmt': 'data%/%t/%t/res_%t.dat', 'timefmt': ['%Y', '%Y%m%d', '%Y%m%d_%H%M%S'] } self.ods.set_filename_format(ods_policy) while True: try: job = self.job_queue.get() log.info('From: %s, fetch %s , %s, start.' % (tname, job.url_type, job.url)) if job.url_type == LIST_URL_TYPE: result = GetListData(job.url) for rating, title, url, source, area, actor, cid, duration, intro, s_intro, date, ctype, imgh_url, imgv_url in result: purl = BASE_URL % int(cid) supporturl = SUPPORT_URL % (urllib.quote( title.encode('gb18030'))) newjob = Job(rating=rating, title=title, url=purl, supporturl=supporturl, mainurl=url, source=source, area=area, actor=actor, cid=cid, \ duration=duration, intro=intro, s_intro=s_intro, date=date, ctype=ctype, imgh_url=imgh_url, imgv_url=imgv_url, url_type=ITEM_URL_TYPE) self.job_queue.put(newjob) elif job.url_type == ITEM_URL_TYPE: result = GetItemData(job.url, 1) if not result: result = GetItemData(job.supporturl, 2) for playurl, anchor, playtimes in result: newjob = Job(rating=job.rating, title=job.title, purl=job.purl, supporturl=job.supporturl, mainurl=job.mainurl, source=job.source, area=job.area, actor=job.actor, cid=job.cid, \ duration=job.duration, intro=job.intro, s_intro=job.s_intro, date=job.date, ctype=job.ctype, imgh_url=job.imgh_url, imgv_url=job.imgv_url, url_type=REAL_URL_TYPE, \ url=playurl, anchor=anchor, playtimes=playtimes) self.job_queue.put(newjob) elif job.url_type == REAL_URL_TYPE: result = GetRealPlayUrl(job.url) job.reallinks = json.dumps(result, ensure_ascii=False, encoding='utf-8') log.error(job.reallinks) self.process_job(job) sleep(random.uniform(10, 11.8)) except: t, v, tb = sys.exc_info() log.error("url: %s %s,%s,%s" % (job.url, t, v, traceback.format_tb(tb))) def process_job(self, job): newtaskdata = json.dumps(job.__dict__, ensure_ascii=False, encoding='utf-8') self.ods.save_data("%s\n" % newtaskdata, no_head=True)
def __init__(self, job_queue): threading.Thread.__init__(self) self.job_queue = job_queue self.ods = DataSaver() self.sfs = DataSaver()