예제 #1
0
    def run(self):
        t1 = time.time()

        while not self.obj.pool.done():
            self.dl_speed = self.calcDownloadSpeed(self.shared_var.value)
            if self.dl_speed > 0:
                self.eta = self.calcETA(
                    (self.obj.filesize - self.shared_var.value) /
                    self.dl_speed)

            if self.progress_bar:
                if self.obj.filesize:
                    status = r"[%s Threads] %s / %s @ %s/s %s [%3.1f%%, %s left]   " % (
                        self.obj.actual_threads_used,
                        utils.sizeof_human(self.shared_var.value),
                        utils.sizeof_human(self.obj.filesize),
                        utils.sizeof_human(self.dl_speed),
                        utils.progress_bar(
                            1.0 * self.shared_var.value / self.obj.filesize),
                        self.shared_var.value * 100.0 / self.obj.filesize,
                        utils.time_human(self.eta, fmt_short=True))
                else:
                    status = r"[%s Threads] %s / ??? MB @ %s/s   " % (
                        self.obj.actual_threads_used,
                        utils.sizeof_human(self.shared_var.value),
                        utils.sizeof_human(self.dl_speed))
                status = status + chr(8) * (len(status) + 1)
                print status,

            time.sleep(0.1)

        if self.obj._killed:
            self.logger.debug("File download process has been stopped.")
            return

        if self.progress_bar:
            if self.obj.filesize:
                print r"[%s Threads] %s / %s @ %s/s %s [100%%, 0s left]    " % (
                    self.obj.actual_threads_used,
                    utils.sizeof_human(self.obj.filesize),
                    utils.sizeof_human(self.obj.filesize),
                    utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0))
            else:
                print r"[%s Threads] %s / %s @ %s/s    " % (
                    self.obj.actual_threads_used,
                    utils.sizeof_human(self.shared_var.value),
                    self.shared_var.value / 1024.0**2,
                    utils.sizeof_human(self.dl_speed))

        t2 = time.time()
        self.dl_time = float(t2 - t1)

        while self.obj.post_threadpool_thread.is_alive():
            time.sleep(0.1)

        self.obj.pool.shutdown()
        self.obj.status = "finished"
        if not self.obj.errors:
            self.logger.debug("File downloaded within %.2f seconds." %
                              self.dl_time)
예제 #2
0
 def get_speed(self, human=False):
     '''
     Get current transfer speed in bytes per second.
     
     :param human: If true, returns a human-readable formatted string. Else, returns an int type number
     :type human: bool
     :rtype: int/string
     '''
     if human:
         return "%s/s" % utils.sizeof_human(self.control_thread.get_speed())
     return self.control_thread.get_speed()
예제 #3
0
 def get_speed(self, human=False):
     '''
     Get current transfer speed in bytes per second.
     
     :param human: If true, returns a human-readable formatted string. Else, returns an int type number
     :type human: bool
     :rtype: int/string
     '''
     if human:
         return "%s/s" % utils.sizeof_human(self.control_thread.get_speed())
     return self.control_thread.get_speed()
예제 #4
0
 def get_dl_size(self, human=False):
     '''
     Get downloaded bytes counter in bytes.
     
     :param human: If true, returns a human-readable formatted string. Else, returns an int type number
     :type human: bool
     :rtype: int/string
     '''
     if human:
         return utils.sizeof_human(self.control_thread.get_dl_size())    
     return self.control_thread.get_dl_size()
예제 #5
0
 def run(self):
     t1 = time.time()
     
     while self.obj.pool.workRequests:
         self.dl_speed = self.calcDownloadSpeed(self.shared_var.value)
         if self.dl_speed > 0:
             self.eta = self.calcETA((self.obj.filesize-self.shared_var.value)/self.dl_speed)
             
         if self.progress_bar:
             if self.obj.filesize:
                 status = r"[*] %s / %s @ %s/s %s [%3.2f%%, %s left]   " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0*self.shared_var.value/self.obj.filesize), self.shared_var.value * 100.0 / self.obj.filesize, utils.time_human(self.eta, fmt_short=True))
             else:
                 status = r"[*] %s / ??? MB @ %s/s   " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed))
             status = status + chr(8)*(len(status)+1)
             print status,
         try:
             self.obj.pool.poll()
         except threadpool.NoResultsPending:
             break
         time.sleep(0.1)
         
     if self.obj._killed:
         self.logger.debug("File download process has been stopped.")
         return
         
     if self.progress_bar:
         if self.obj.filesize:
             print r"[*] %s / %s @ %s/s %s [100%%, 0s left]    " % (utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0))
         else:
             print r"[*] %s / %s @ %s/s    " % (utils.sizeof_human(self.shared_var.value), self.shared_var.value / 1024.0**2, utils.sizeof_human(self.dl_speed))
             
     t2 = time.time()
     self.dl_time = float(t2-t1)
     
     while self.obj.post_threadpool_thread.is_alive():
         time.sleep(0.1)
         
     self.obj.status = "finished"
     if not self.obj.errors:
         self.logger.debug("File downloaded within %.2f seconds." % self.dl_time)
예제 #6
0
 def get_dl_size(self, human=False):
     '''
     Get downloaded bytes counter in bytes.
     
     :param human: If true, returns a human-readable formatted string. Else, returns an int type number
     :type human: bool
     :rtype: int/string
     '''
     if not self.control_thread:
         return 0
     if human:
         return utils.sizeof_human(self.control_thread.get_dl_size())
     return self.control_thread.get_dl_size()
예제 #7
0
class SmartDL:
    '''
    The main SmartDL class
    
    :param urls: Download url. It is possible to pass unsafe and unicode characters. You can also pass a list of urls, and those will be used as mirrors.
    :type urls: string or list of strings
    :param dest: Destination path. Default is `%TEMP%/pySmartDL/`.
    :type dest: string
    :param progress_bar: If True, prints a progress bar to the `stdout stream <http://docs.python.org/2/library/sys.html#sys.stdout>`_. Default is `True`.
    :type progress_bar: bool
	:param fix_urls: If true, attempts to fix urls with unsafe characters.
	:type fix_urls: bool
    :param logger: An optional logger.
    :type logger: `logging.Logger` instance
    :param connect_default_logger: If true, connects a default logger to the class.
    :type connect_default_logger: bool
    :rtype: `SmartDL` instance
    
    .. NOTE::
            The provided dest may be a folder or a full path name (including filename). The workflow is:
            
            * If the path exists, and it's an existing folder, the file will be downloaded to there with the original filename.
            * If the past does not exist, it will create the folders, if needed, and refer to the last section of the path as the filename.
            * If you want to download to folder that does not exist at the moment, and want the module to fill in the filename, make sure the path ends with `os.sep`.
            * If no path is provided, `%TEMP%/pySmartDL/` will be used.
    '''
    def __init__(self,
                 urls,
                 dest=None,
                 progress_bar=True,
                 fix_urls=True,
                 logger=None,
                 connect_default_logger=False):
        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        fn = os.path.basename(urlparse(self.url).path)
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL',
                                         fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(
                    self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            self.logger = utils.create_debugging_logger()
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = 5
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2 * 2  # 2MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(
            c_int, 0)  # a ctypes var that counts the bytes already downloaded
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.debug('Folder "%s" does not exist. Creating...' %
                              os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning(
                "Server does not support HTTPRange. threads_count is set to 1."
            )
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning(
                'Destination "%s" already exists. Existing file will be removed.'
                % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning(
                'Directory "%s" does not exist. Creating it...' %
                os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)

    def __str__(self):
        return 'SmartDL(r"%s", dest=r"%s")' % (self.url, self.dest)

    def __repr__(self):
        return "<SmartDL %s>" % (self.url)

    def add_basic_authentication(self, username, password):
        '''
        Uses HTTP Basic Access authentication for the connection.
        
        :param username: Username.
        :type username: string
        :param password: Password.
        :type password: string
        '''
        auth_string = '%s:%s' % (username, password)
        base64string = base64.standard_b64encode(auth_string.encode('utf-8'))
        self.headers['Authorization'] = b"Basic " + base64string

    def add_hash_verification(self, algorithm, hash):
        '''
        Adds hash verification to the download.
        
        If hash is not correct, will try different mirrors. If all mirrors aren't
        passing hash verification, `HashFailedException` Exception will be raised.
        
        .. NOTE::
            If downloaded file already exist on the destination, and hash matches, pySmartDL will not download it again.
            
        .. WARNING::
            The hashing algorithm must be supported on your system, as documented at `hashlib documentation page <http://docs.python.org/2/library/hashlib.html>`_.
        
        :param algorithm: Hashing algorithm.
        :type algorithm: string
        :param hash: Hash code.
        :type hash: string
        '''

        self.verify_hash = True
        self.hash_algorithm = algorithm
        self.hash_code = hash

    def fetch_hash_sums(self):
        '''
        Will attempt to fetch UNIX hash sums files (`SHA256SUMS`, `SHA1SUMS` or `MD5SUMS` files in
        the same url directory).
        
        Calls `self.add_hash_verification` if successful. Returns if a matching hash was found.
        
        :rtype: bool
        
        *New in 1.2.1*
        '''
        default_sums_filenames = ['SHA256SUMS', 'SHA1SUMS', 'MD5SUMS']
        folder = os.path.dirname(self.url)
        orig_basename = os.path.basename(self.url)

        self.logger.debug("Looking for SUMS files...")
        for filename in default_sums_filenames:
            try:
                sums_url = "%s/%s" % (folder, filename)
                obj = urllib2.urlopen(sums_url)
                data = obj.read().split('\n')
                obj.close()

                for line in data:
                    if orig_basename.lower() in line.lower():
                        self.logger.debug("Found a matching hash in %s" %
                                          sums_url)
                        algo = filename.rstrip('SUMS')
                        hash = line.split(' ')[0]
                        self.add_hash_verification(algo, hash)
                        return

            except urllib2.HTTPError:
                continue

    def start(self, blocking=None):
        '''
        Starts the download task. Will raise `RuntimeError` if it's the object's already downloading.
        
        .. warning::
            If you're using the non-blocking mode, Exceptions won't be raised. In that case, call
            `isSuccessful()` after the task is finished, to make sure the download succeeded. Call
            `get_errors()` to get the the exceptions.
        
        :param blocking: If true, calling this function will block the thread until the download finished. Default is *True*.
        :type blocking: bool
        '''
        if not self.status == "ready":
            raise RuntimeError("cannot start (current status is %s)" %
                               self.status)

        if blocking is None:
            blocking = self._start_func_blocking
        else:
            self._start_func_blocking = blocking

        if self.mirrors:
            self.logger.debug('One URL and %d mirrors are loaded.' %
                              len(self.mirrors))
        else:
            self.logger.debug('One URL is loaded.')

        if self.verify_hash and os.path.exists(self.dest):
            with open(self.dest, 'rb') as f:
                hash = hashlib.new(self.hash_algorithm, f.read()).hexdigest()
                if hash == self.hash_code:
                    self.logger.debug(
                        "Destination '%s' already exists, and the hash matches. No need to download."
                        % self.dest)
                    self.status = 'finished'
                    return

        self.logger.debug("Downloading '%s' to '%s'..." %
                          (self.url, self.dest))
        req = urllib2.Request(self.url, headers=self.headers)
        try:
            urlObj = urllib2.urlopen(req, timeout=self.timeout)
        except (urllib2.HTTPError, urllib2.URLError), e:
            self.errors.append(e)
            if self.mirrors:
                self.logger.debug("%s. Trying next mirror..." % unicode(e))
                self.url = self.mirrors.pop(0)
                self.start(blocking)
                return
            else:
                self.logger.warning(unicode(e))
                self.errors.append(e)
                self._failed = True
                self.status = "finished"
                raise

        try:
            self.filesize = int(urlObj.headers["Content-Length"])
            self.logger.debug(
                "Content-Length is %d (%s)." %
                (self.filesize, utils.sizeof_human(self.filesize)))
        except IndexError:
            self.logger.warning(
                "Server did not send Content-Length. Filesize is unknown.")
            self.filesize = 0

        args = _calc_chunk_size(self.filesize, self.threads_count,
                                self.minChunkFile)
        bytes_per_thread = args[0][1] - args[0][0]
        if len(args) > 1:
            self.logger.debug(
                "Launching %d threads (downloads %s/Thread)." %
                (len(args), utils.sizeof_human(bytes_per_thread)))
        else:
            self.logger.debug("Launching 1 thread.")

        self.status = "downloading"

        for i, arg in enumerate(args):
            req = self.pool.submit(download, self.url,
                                   self.dest + ".%.3d" % i, arg[0], arg[1],
                                   copy.deepcopy(self.headers), self.timeout,
                                   self.shared_var, self.thread_shared_cmds)

        self.post_threadpool_thread = threading.Thread(
            target=post_threadpool_actions,
            args=(self.pool, [[(self.dest + ".%.3d" % i)
                               for i in range(len(args))],
                              self.dest], self.filesize, self))
        self.post_threadpool_thread.daemon = True
        self.post_threadpool_thread.start()

        self.control_thread = ControlThread(self)

        if blocking:
            self.wait(raise_exceptions=True)
예제 #8
0
 def get_dl_size(self, human=False):
     if not self.control_thread:
         return 0
     if human:
         return utils.sizeof_human(self.control_thread.get_dl_size())
     return self.control_thread.get_dl_size()
예제 #9
0
 def get_speed(self, human=False):
     if human:
         return "%s/s" % utils.sizeof_human(self.control_thread.get_speed())
     return self.control_thread.get_speed()
예제 #10
0
class MultiGet:
    def __init__(self,
                 urls,
                 dest=None,
                 progress_bar=True,
                 fix_urls=True,
                 logger=None,
                 connect_default_logger=False,
                 max_parallelism=16,
                 timeout=4,
                 max_retries=4,
                 minimum_chunk_size=1024**2 * 2):
        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        fn = urllib2.unquote(os.path.basename(urlparse(
            self.url).path)).decode('utf-8')
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pyMultiGet',
                                         fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(
                    self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            self.logger = utils.create_debugging_logger()
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = max_parallelism
        self.actual_threads_used = max_parallelism
        self.timeout = timeout
        self.current_attemp = 1
        self.attemps_limit = max_retries
        self.minChunkFile = minimum_chunk_size
        self.filesize = 0
        self.shared_var = multiprocessing.Value(c_int, 0)
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.debug('Folder "%s" does not exist. Creating...' %
                              os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning(
                "Server does not support HTTPRange. threads_count is set to 1."
            )
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning(
                'Destination "%s" already exists. Existing file will be removed.'
                % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning(
                'Directory "%s" does not exist. Creating it...' %
                os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)

    def __str__(self):
        return 'MultiGet(r"%s", dest=r"%s")' % (self.url, self.dest)

    def __repr__(self):
        return "<MultiGet %s>" % (self.url)

    def add_basic_authentication(self, username, password):
        auth_string = '%s:%s' % (username, password)
        base64string = base64.standard_b64encode(auth_string.encode('utf-8'))
        self.headers['Authorization'] = b"Basic " + base64string

    def add_hash_verification(self, algorithm, hash):
        self.verify_hash = True
        self.hash_algorithm = algorithm
        self.hash_code = hash

    def fetch_hash_sums(self):
        default_sums_filenames = ['SHA256SUMS', 'SHA1SUMS', 'MD5SUMS']
        folder = os.path.dirname(self.url)
        orig_basename = os.path.basename(self.url)

        self.logger.debug("Looking for SUMS files...")
        for filename in default_sums_filenames:
            try:
                sums_url = "%s/%s" % (folder, filename)
                obj = urllib2.urlopen(sums_url)
                data = obj.read().split('\n')
                obj.close()

                for line in data:
                    if orig_basename.lower() in line.lower():
                        self.logger.debug("Found a matching hash in %s" %
                                          sums_url)
                        algo = filename.rstrip('SUMS')
                        hash = line.split(' ')[0]
                        self.add_hash_verification(algo, hash)
                        return

            except urllib2.HTTPError:
                continue

    def start(self, blocking=None):
        if not self.status == "ready":
            raise RuntimeError("cannot start (current status is %s)" %
                               self.status)

        if blocking is None:
            blocking = self._start_func_blocking
        else:
            self._start_func_blocking = blocking

        if self.mirrors:
            self.logger.debug('One URL and %d mirrors are loaded.' %
                              len(self.mirrors))
        else:
            self.logger.debug('One URL is loaded.')

        if self.verify_hash and os.path.exists(self.dest):
            with open(self.dest, 'rb') as f:
                hash = hashlib.new(self.hash_algorithm, f.read()).hexdigest()
                if hash == self.hash_code:
                    self.logger.debug(
                        "Destination '%s' already exists, and the hash matches. No need to download."
                        % self.dest)
                    self.status = 'finished'
                    return

        self.logger.debug("Downloading '%s' to '%s'..." %
                          (self.url, self.dest))
        req = urllib2.Request(self.url, headers=self.headers)
        try:
            urlObj = urllib2.urlopen(req, timeout=self.timeout)
        except (urllib2.HTTPError, urllib2.URLError), e:
            self.errors.append(e)
            if self.mirrors:
                self.logger.debug("%s. Trying next mirror..." % unicode(e))
                self.url = self.mirrors.pop(0)
                self.start(blocking)
                return
            else:
                self.logger.warning(unicode(e))
                self.errors.append(e)
                self._failed = True
                self.status = "finished"
                raise

        try:
            self.filesize = int(urlObj.headers["Content-Length"])
            self.logger.debug(
                "Content-Length is %d (%s)." %
                (self.filesize, utils.sizeof_human(self.filesize)))
        except (IndexError, KeyError):
            self.logger.warning(
                "Server did not send Content-Length. Filesize is unknown.")
            self.filesize = 0

        args = _calc_chunk_size(self.filesize, self.threads_count,
                                self.minChunkFile)
        self.actual_threads_used = len(args)
        bytes_per_thread = args[0][1] - args[0][0]
        if len(args) > 1:
            self.logger.debug(
                "Launching %d threads (downloads %s/Thread)." %
                (len(args), utils.sizeof_human(bytes_per_thread)))
        else:
            self.logger.debug("Launching 1 thread.")

        self.status = "downloading"

        for i, arg in enumerate(args):
            req = self.pool.submit(download, self.url,
                                   self.dest + ".%.3d" % i, arg[0], arg[1],
                                   copy.deepcopy(self.headers), self.timeout,
                                   self.shared_var, self.thread_shared_cmds)

        self.post_threadpool_thread = threading.Thread(
            target=post_threadpool_actions,
            args=(self.pool, [[(self.dest + ".%.3d" % i)
                               for i in range(len(args))],
                              self.dest], self.filesize, self))
        self.post_threadpool_thread.daemon = True
        self.post_threadpool_thread.start()

        self.control_thread = ControlThread(self)

        if blocking:
            self.wait(raise_exceptions=True)