示例#1
0
class Worker(threading.Thread):
    def __init__(self, job_queue):
        threading.Thread.__init__(self)
        self.job_queue = job_queue
        self.ods = DataSaver()
        self.sfs = DataSaver()

    def run(self):
        tname = threading.current_thread().getName()
        ods_policy = {
            'roll_policy': 'time:hour',
            'namefmt': 'data%/%t/%t/res_%t.dat',
            'timefmt': ['%Y', '%Y%m%d', '%Y%m%d_%H%M%S']
        }
        self.ods.set_filename_format(ods_policy)
        while True:
            try:
                job = self.job_queue.get()
                log.info('From: %s, fetch %s , %s, start.' %
                         (tname, job.url_type, job.url))
                if job.url_type == LIST_URL_TYPE:
                    result = GetListData(job.url)
                    for rating, title, url, source, area, actor, cid, duration, intro, s_intro, date, ctype, imgh_url, imgv_url in result:
                        purl = BASE_URL % int(cid)
                        supporturl = SUPPORT_URL % (urllib.quote(
                            title.encode('gb18030')))
                        newjob = Job(rating=rating, title=title, url=purl, supporturl=supporturl, mainurl=url, source=source, area=area, actor=actor, cid=cid, \
                                     duration=duration, intro=intro, s_intro=s_intro, date=date, ctype=ctype, imgh_url=imgh_url, imgv_url=imgv_url, url_type=ITEM_URL_TYPE)
                        self.job_queue.put(newjob)
                elif job.url_type == ITEM_URL_TYPE:
                    result = GetItemData(job.url, 1)
                    if not result:
                        result = GetItemData(job.supporturl, 2)
                    for playurl, anchor, playtimes in result:
                        newjob = Job(rating=job.rating, title=job.title, purl=job.purl, supporturl=job.supporturl, mainurl=job.mainurl, source=job.source, area=job.area, actor=job.actor, cid=job.cid, \
                                     duration=job.duration, intro=job.intro, s_intro=job.s_intro, date=job.date, ctype=job.ctype, imgh_url=job.imgh_url, imgv_url=job.imgv_url, url_type=REAL_URL_TYPE, \
                                     url=playurl, anchor=anchor, playtimes=playtimes)
                        self.job_queue.put(newjob)
                elif job.url_type == REAL_URL_TYPE:
                    result = GetRealPlayUrl(job.url)
                    job.reallinks = json.dumps(result,
                                               ensure_ascii=False,
                                               encoding='utf-8')
                    log.error(job.reallinks)
                    self.process_job(job)

                sleep(random.uniform(10, 11.8))
            except:
                t, v, tb = sys.exc_info()
                log.error("url:  %s %s,%s,%s" %
                          (job.url, t, v, traceback.format_tb(tb)))

    def process_job(self, job):
        newtaskdata = json.dumps(job.__dict__,
                                 ensure_ascii=False,
                                 encoding='utf-8')
        self.ods.save_data("%s\n" % newtaskdata, no_head=True)
示例#2
0
 def __init__(self, job_queue):
     threading.Thread.__init__(self)
     self.job_queue = job_queue
     self.ods = DataSaver()
     self.sfs = DataSaver()