def download(url, dest, startByte=0, endByte=None, headers=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3): "The basic download function that runs at each thread." logger = logger or utils.DummyLogger() if not headers: headers = {} if endByte: headers['Range'] = 'bytes=%d-%d' % (startByte, endByte) logger.info("Downloading '%s' to '%s'..." % (url, dest)) req = urllib2.Request(url, headers=headers) try: urlObj = urllib2.urlopen(req, timeout=timeout) except urllib2.HTTPError, e: if e.code == 416: ''' HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask for a range that is not available on the server. It will happen when the server will try to send us a .html page that means something like "you opened too many connections to our server". If this happens, we will wait for the other threads to finish their connections and try again. ''' if retries > 0: logger.warning("Thread didn't got the file it was expecting. Retrying (%d times left)..." % (retries-1)) time.sleep(5) return download(url, dest, startByte, endByte, headers, timeout, shared_var, thread_shared_cmds, logger, retries-1) else: raise else: raise
def download(url, dest, startByte=0, endByte=None, headers=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3): logger = logger or utils.DummyLogger() if not headers: headers = {} if endByte: headers['Range'] = 'bytes=%d-%d' % (startByte, endByte) logger.debug("Downloading '%s' to '%s'..." % (url, dest)) req = urllib2.Request(url, headers=headers) try: urlObj = urllib2.urlopen(req, timeout=timeout) except urllib2.HTTPError, e: if e.code == 416: if retries > 0: logger.warning( "Thread didn't got the file it was expecting. Retrying (%d times left)..." % (retries - 1)) time.sleep(5) return download(url, dest, startByte, endByte, headers, timeout, shared_var, thread_shared_cmds, logger, retries - 1) else: raise else: raise
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False): self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) fn = os.path.basename(urlparse(self.url).path) self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: self.logger = utils.create_debugging_logger() else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = 3 self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2*2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.") self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, threads=5, logger=None, connect_default_logger=False, proxy=None): global DEFAULT_LOGGER_CREATED self.mirrors = [urls] if isinstance(urls, basestring) else urls if fix_urls: self.mirrors = [utils.url_fix(x) for x in self.mirrors] self.url = self.mirrors.pop(0) if proxy is not None: proxy = urllib2.ProxyHandler({'http': proxy, 'https': proxy}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) fn = urllib2.unquote(os.path.basename(urlparse(self.url).path)) if sys.version_info < (3, 0): fn = fn.decode('utf-8') # required only on python 2 self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn) if self.dest[-1] == os.sep: if os.path.exists(self.dest[:-1]) and os.path.isfile( self.dest[:-1]): os.unlink(self.dest[:-1]) self.dest += fn if os.path.isdir(self.dest): self.dest = os.path.join(self.dest, fn) self.progress_bar = progress_bar if logger: self.logger = logger elif connect_default_logger: if not DEFAULT_LOGGER_CREATED: self.logger = utils.create_debugging_logger() DEFAULT_LOGGER_CREATED = True else: self.logger = logging.getLogger('pySmartDL') else: self.logger = utils.DummyLogger() self.headers = {'User-Agent': utils.get_random_useragent()} self.threads_count = threads self.timeout = 4 self.current_attemp = 1 self.attemps_limit = 4 self.minChunkFile = 1024**2 * 2 # 2MB self.filesize = 0 self.shared_var = multiprocessing.Value( c_int, 0) # a ctypes var that counts the bytes already downloaded self.thread_shared_cmds = {} self.status = "ready" self.verify_hash = False self._killed = False self._failed = False self._start_func_blocking = True self.errors = [] self.post_threadpool_thread = None self.control_thread = None if not os.path.exists(os.path.dirname(self.dest)): self.logger.info('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) if not utils.is_HTTPRange_supported(self.url): self.logger.warning( "Server does not support HTTPRange. threads_count is set to 1." ) self.threads_count = 1 if os.path.exists(self.dest): self.logger.warning( 'Destination "%s" already exists. Existing file will be removed.' % self.dest) if not os.path.exists(os.path.dirname(self.dest)): self.logger.warning( 'Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest)) os.makedirs(os.path.dirname(self.dest)) self.logger.info("Creating a ThreadPool of %d thread(s).", self.threads_count) self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)