def init_queue(self, queue): DownloadThread.__queue = queue if DownloadThread.__queue and not DownloadThread.__queue.empty(): logger_root.debug('Queue init succeed. Exiting ' + self.name) return Tr
def run(self): logger_root.debug('Starting ' + self.name) global tmp_dir global queue_lock global error_list global succeed_list global download_list global exitFlag global err_exit while True: queue_lock.acquire() if not DownloadThread.__queue.empty(): try: self.q_set = DownloadThread.__queue.get(block=False) except: queue_lock.release() break else: queue_lock.release() # print self.q_set #打印队列条目 self.dir = self.q_set[0] self.url = self.q_set[1] fname = os.path.basename(self.url) if self.url in download_list: #if os.path.exists(tmp_dir + fname): 使用新的更为精确的download_list作为判断条件 logger_root.warning('%s duplicate download items %s.' % (self.name, self.url)) elif not os.path.exists(self.dir + fname): queue_lock.acquire() download_list.add(self.url) queue_lock.release() logger_root.info('%s start download %s.' % (self.name, self.url)) try: host = urllib2.urlparse.urlparse(self.url).netloc headers = {'Host': host, 'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36', 'Accept':'*/*', 'Connection':'keep-alive' } req = urllib2.Request(self.url,headers=headers) handle = urllib2.urlopen(req, timeout=120) etag = handle.headers['etag'].strip('"') s_length = int(handle.headers["Content-Length"].strip('"')) d_length = 0 with open(tmp_dir + fname, 'wb') as f_handler: while True: if exitFlag: raise KeyboardInterrupt buf = 4096 if s_length - d_length > 4096 else s_length - d_length if buf == 0: f_handler.flush() break chunk = handle.read(buf) # if not chunk: #改用Content-Length与已下载大小之差来判断 # break if not chunk and s_length != d_length: raise Exception, 'Network failure appeared in the process of download %s.' % self.url f_handler.write(chunk) f_handler.flush() d_length += len(chunk) except KeyboardInterrupt: while not f_handler.closed: time.sleep(1) if self.check_file(tmp_dir + fname, etag): move(tmp_dir + fname, self.dir + fname) succeed_list.add(self.url) logger_root.info('%s Successful download %s.' % (self.name, self.url)) else: os.remove(tmp_dir + fname) # error_list.add((self.dir, self.url)) logger_root.warning('%s stop download %s' % (self.name, self.url)) break except URLError, e: logger_root.error('%s %s %s' % (self.name, self.url, str(e))) error_list.add((self.dir, self.url)) queue_lock.acquire() download_list.discard(self.url) queue_lock.release() continue except socket.timeout, e: os.remove(tmp_dir + fname) logger_root.error('%s %s %s' % (self.name, self.url, str(e))) error_list.add((self.dir, self.url)) except IOError, e: os.remove(tmp_dir + fname) logger_root.error('%s %s %s' % (self.name, self.url, str(e))) print traceback.format_exc() break except Exception, e: os.remove(tmp_dir + fname) logger_root.error('%s %s %s' % (self.name, self.url, str(e))) error_list.add((self.dir, self.url)) print traceback.format_exc()
error_list.add((self.dir, self.url)) logger_root.error('%s Incomplete download %s, source file length is %s, downloaded file length is %s.' % (self.name, self.url, s_length, d_length)) finally: try: handle.close() queue_lock.acquire() download_list.discard(self.url) queue_lock.release() except Exception, e: # logger_root.error('try_finally %s %s %s.' % (self.name, self.url, str(e))) pass else: queue_lock.release() break if exitFlag: logger_root.debug('receive a signal to exit, [%s] stop.' % self.name) else: logger_root.debug('[%s] exit.' % self.name) def check_file(self, file, etag): stat = False qetag = get_qetag(file) if qetag == etag: stat = True # else: # logger_root.error('%s file %s checked failed. The source file\'s etag was %s, download file\'s etag is %s' % (self.name, self.url, etag, qetag)) return stat def get_error_list(): ret = []