Exemplo n.º 1
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)

        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url,
                                        headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)

        if auth:
            username, password = auth
            req.add_header('Authorization',
                           _create_http_basicauth_header(username, password))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False  # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024 * 8  # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str(
                        (total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)

                data = u.read(bs)
                chunk_size = len(data)

                if len(data) == 0:
                    break

                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()

        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {
            }  # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Exemplo n.º 2
0
    def get(self,
            target_file,
            info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - info: message to print (LOG.info) if transfer actually happens

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work effectively, the previous ETag must be made available in
          the properties file that is created when `save_properties` flag is set
          to True in the *past* download call.

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata is saved along side the
          target_file. (This is required for `use_cache` in future downloads)
          
        - auth: (username, password) -- optional http basic auth data
        """
        assert target_file

        urlprops = self.URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value for last_attempt_utc *now* so we don't
            # have to deal with when an exception arises later.
            props.custom['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        if use_cache and props:
            req = urllib2.Request(
                self.url, headers={'If-None-Match': props.headers['etag']})
        else:
            req = urllib2.Request(self.url)

        if auth:
            username, password = auth
            req.add_header('Authorization',
                           _create_http_basicauth_header(username, password))

        # Set User-Agent
        # XXX: urllib2.py does not expose its default user-agent string; so
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % urllib2.__version__
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        req.add_header('User-Agent', pypm_user_agent)

        u = urllib2.urlopen(req)
        bs = 1024 * 8  # from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        self.hook_initialize(total_size)

        if info:
            LOG.info(info)

        with open(target_file, 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str(
                        (total_bytes_transferred, total_size))

                self.hook_transferring(chunk_size, total_bytes_transferred)

                data = u.read(bs)
                chunk_size = len(data)

                if data == '':
                    break

                f.write(data)
                total_bytes_transferred += chunk_size

        self.hook_completed()

        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {
            }  # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(
                datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)
Exemplo n.º 3
0
    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)
            
        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024*8 # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True
Exemplo n.º 4
0
    def get(self, target_file, info=None,
            use_cache=False, save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - info: message to print (LOG.info) if transfer actually happens

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work effectively, the previous ETag must be made available in
          the properties file that is created when `save_properties` flag is set
          to True in the *past* download call.

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata is saved along side the
          target_file. (This is required for `use_cache` in future downloads)
          
        - auth: (username, password) -- optional http basic auth data
        """
        assert target_file

        urlprops = self.URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value for last_attempt_utc *now* so we don't
            # have to deal with when an exception arises later.
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))
            
        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)
        
        u = six.moves.urlopen(req)
        bs = 1024*8 # from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None
            
        total_bytes_transferred = 0
        chunk_size = 0

        self.hook_initialize(total_size)

        if info:
            LOG.info(info)

        with open(target_file, 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))
                    
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)