예제 #1
0
    def urlretrieve(self, url, filename, reporthook, ssl_ignore_cert=False):
        """
        Retrieve a URL into a temporary location on disk.

        Requires a URL argument. If a filename is passed, it is used as
        the temporary file location. The reporthook argument should be
        a callable that accepts a block number, a read size, and the
        total file size of the URL target. The data argument should be
        valid URL encoded data.

        If a filename is passed and the URL points to a local resource,
        the result is a copy from local file to new file.

        Returns a tuple containing the path to the newly created
        data file as well as the resulting HTTPMessage object.
        """
        url_type, path = splittype(url)

        if ssl_ignore_cert:
            # ignore certificate
            ssl_ctx = ssl._create_unverified_context()
        else:
            # let the library does the work
            ssl_ctx = None

        msg = 'Opening %s ...' % (url, )
        print(msg, end='\r')
        with contextlib.closing(urlopen(url, None, context=ssl_ctx)) as fp:
            print('%*s' % (
                len(msg),
                '',
            ), end='\r')
            headers = fp.info()

            with open(filename, 'wb') as tfp:
                result = filename, headers
                bs = 1024 * 8
                size = -1
                read = 0
                blocknum = 0
                if "content-length" in headers:
                    size = int(headers["Content-Length"])

                reporthook(blocknum, bs, size)

                while True:
                    block = fp.read(bs)
                    if not block:
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    reporthook(blocknum, bs, size)

        if size >= 0 and read < size:
            raise ContentTooShortError(
                "retrieval incomplete: got only %i out of %i bytes" %
                (read, size), result)

        return result
예제 #2
0
def safe_retrieve(url, filename=None, reporthook=None, data=None, maxtries=5, r_range=None):
    if maxtries < -1:
       raise ValueError('maxtries must be at least equal with -1')
    url_type, path = splittype(url)

           
    with contextlib.closing(urlopen(url, data)) as fp:
        headers = fp.info()
        if not r_range is None:
            try:
                headers["Range"] = "bytes=%d-%d" % r_range
            except TypeError:
                raise ValueError('r_range argument must be a tuple of two int : (start, end)')

        # Just return the local path and the "headers" for file://
        # URLs. No sense in performing a copy unless requested.
        if url_type == "file" and not filename:
            return os.path.normpath(path), headers

        # Handle temporary file setup.
        if filename:
            tfp = open(filename, 'wb')
        else:
            tfp = tempfile.NamedTemporaryFile(delete=False)
            filename = tfp.name
            _url_tempfiles.append(filename)

        with tfp:
            result = filename, headers
            bs = 1024*8
            size = -1
            read = 0
            blocknum = 0
            if "content-length" in headers:
                size = int(headers["Content-Length"])
            elif r_range is not None:
                size = r_range[1]

            if reporthook:
                reporthook(blocknum, bs, size)

            while True:
                block = fp.read(bs)
                if not block:
                    break
                read += len(block)
                tfp.write(block)
                blocknum += 1
                if reporthook:
                    reporthook(blocknum, bs, size)

    if size >= 0 and read < size:
        if maxtries > 0 or maxtries == -1:
           safe_retrieve(url, filename, reporthook, data, maxtries if maxtries == -1 else maxtries-1,r_range=(read, size))
        else:
            raise ContentTooShortError("retrieval incomplete: got only %i out of %i bytes"% (read, size), result)

    return result
예제 #3
0
파일: builder.py 프로젝트: nvrsw/gtk-win32
    def urlretrieve(self, url, filename, reporthook, ssl_ignore_cert=False):
        """
        Retrieve a URL into a temporary location on disk.

        Requires a URL argument. If a filename is passed, it is used as
        the temporary file location. The reporthook argument should be
        a callable that accepts a block number, a read size, and the
        total file size of the URL target. The data argument should be
        valid URL encoded data.

        If a filename is passed and the URL points to a local resource,
        the result is a copy from local file to new file.

        Returns a tuple containing the path to the newly created
        data file as well as the resulting HTTPMessage object.
        """
        url_type, path = splittype(url)

        if ssl_ignore_cert:
            # ignore certificate
            ssl_ctx = ssl._create_unverified_context()
        else:
            # let the library does the work
            ssl_ctx = None

        msg = 'Opening %s ...' % (url, )
        print(msg, end='\r')
        with contextlib.closing(urlopen(url, None, context=ssl_ctx)) as fp:
            print('%*s' % (len(msg), '', ), end = '\r')
            headers = fp.info()

            with open(filename, 'wb') as tfp:
                result = filename, headers
                bs = 1024*8
                size = -1
                read = 0
                blocknum = 0
                if "content-length" in headers:
                    size = int(headers["Content-Length"])

                reporthook(blocknum, bs, size)

                while True:
                    block = fp.read(bs)
                    if not block:
                        break
                    read += len(block)
                    tfp.write(block)
                    blocknum += 1
                    reporthook(blocknum, bs, size)

        if size >= 0 and read < size:
            raise ContentTooShortError(
                "retrieval incomplete: got only %i out of %i bytes"
                % (read, size), result)

        return result
def extractReqTarget(full_link):

    if "qunar" not in str(full_link):
        return None
    if "qrt=" in str(full_link):
        return full_link.partition('qrt=')[2]
    if "html.ng" in str(full_link):
        return 'qde'
    proto, rest = ur.splittype(full_link)
    res, rest = ur.splithost(rest)
    return None if not res else res
예제 #5
0
def load_timestream(file_path):
    '''Load a time stream from either a text file, HDF5 file, or URL

    The argument "file_path" can be one of the following:

    1. A path to a text file;

    2. A path to an HDF5 file;

    3. An URL pointing to the JSON record of a test;

    4. An URL pointing to an HDF5 file.

    Return a pair consisting of a dictionary containing the medatada and a
    Timestream object.'''

    if not urlreq.splittype(file_path)[0]:
        # Local path
        ext = os.path.splitext(file_path)[1]
        if ext.lower() == '.txt':
            return None, load_text_file(file_path)
        else:
            return load_hdf5_file(file_path)
    else:
        # URL
        req = urlreq.urlopen(file_path)
        content_type = req.info().get_content_type()

        # We are *forced* to create a named temporary file and close it
        # before reading, because h5py does not support reading from
        # file-like objects like BytesIO or an already opened TemporaryFile
        with NamedTemporaryFile(suffix='h5', delete=False) as h5_file:
            h5_file_name = h5_file.name
            if content_type == 'application/json':
                metadata = json.loads(req.read().decode('utf-8'))
                download_test(file_path, metadata, h5_file)
            elif content_type == 'application/hdf5':
                copyfileobj(req, h5_file)
            else:
                raise ValueError(
                    'unknown content type: "{0}"'.format(content_type))

        result = load_hdf5_file(h5_file_name)[1]
        os.remove(h5_file_name)
        return metadata, result
예제 #6
0
print(url)

print('splitattr')
x = urllib2.splitattr(url)
print(x)
print('splithost')
x = urllib2.splithost(url)
print(x)
print('splitpasswd')
x = urllib2.splitpasswd(url)
print(x)
print('splitport')
x = urllib2.splitport(url)
print(x)
print('splittype')
x = urllib2.splittype(url)
print(x)
print('splituser')
x = urllib2.splituser(url)
print(x)
print('splitvalue')
x = urllib2.splitvalue(url)
print(x)

from urllib.parse import urlparse

#from urllib.urlparse import urlparse

print('dir(urllib2.urlparse)')
#x = dir(urllib2.urlparse)
x = dir(urllib.parse.urlparse)