Exemplo n.º 1
0
def test_http_download_to_file_throws_exception_on_error():
    response = requests.Response()
    response.status_code = 404
    response.iter_content = lambda _: ['content']

    stringio = StringIO()
    session = requests.Session()
    session.get = lambda _: response

    dumper.http_download_to_file('http://example.com', stringio, session)
Exemplo n.º 2
0
def test_http_download_to_file_without_session_uses_requests_get(mock_get):
    response = requests.Response()
    response.status_code = 200
    response.iter_content = lambda _: ['content']
    mock_get.return_value = response
    stringio = StringIO()

    dumper.http_download_to_file('http://example.com', stringio)

    stringio.seek(0)
    assert stringio.read() == 'content'
Exemplo n.º 3
0
def test_http_download_to_file_with_session():
    response = requests.Response()
    response.status_code = 200
    response.iter_content = lambda _: ['content']

    stringio = StringIO()
    session = requests.Session()
    session.get = lambda _: response

    dumper.http_download_to_file('http://example.com', stringio, session)
    stringio.seek(0)
    assert stringio.read() == 'content'
Exemplo n.º 4
0
def test_http_download_to_file_without_session_uses_requests_get():
    response = requests.Response()
    response.status_code = 200
    response.iter_content = lambda _: ['content']

    stringio = StringIO()

    with stubbed(requests.get, lambda _, stream=False: response):
        dumper.http_download_to_file('http://example.com', stringio)

    stringio.seek(0)
    eq_(stringio.read(), 'content')
Exemplo n.º 5
0
    def download(cls, rse, date='latest', cache_dir=DUMPS_CACHE_DIR):
        """
        Downloads the requested dump and returns an open read-only mode file
        like object.
        """
        logger = logging.getLogger('auditor.data_models')
        requests_session = get_requests_session()
        if date == 'latest':
            url = ''.join((cls.BASE_URL, cls.URI, '?rse={0}'.format(rse)))
            request_headers = requests_session.head(url)
            for field in request_headers.headers['content-disposition'].split(
                    ';'):
                if field.startswith('filename='):
                    date = field.split('=')[1].split('_')[-1].split('.')[0]

        else:
            assert isinstance(date, datetime.datetime)
            date = date.strftime('%d-%m-%Y')  # pylint: disable=no-member
            url = ''.join((
                cls.BASE_URL,
                cls.URI,
                '?rse={0}&date={1}'.format(rse, date),
            ))

        if not os.path.isdir(cache_dir):
            os.mkdir(cache_dir)

        filename = '{0}_{1}_{2}_{3}'.format(
            cls.__name__.lower(), rse, date,
            hashlib.sha1(url.encode()).hexdigest())
        filename = re.sub(r'\W', '-', filename)
        path = os.path.join(cache_dir, filename)

        if not os.path.exists(path):
            logger.debug('Trying to download: "%s"', url)
            response = requests_session.head(url)
            if response.status_code != 200:
                logger.error(
                    'Retrieving %s returned %d status code',
                    url,
                    response.status_code,
                )
                raise HTTPDownloadFailed('Downloading {0} dump'.format(
                    cls.__name__),
                                         code=response.status_code)

            with temp_file(cache_dir, final_name=filename) as (tfile, _):
                http_download_to_file(url, tfile, session=requests_session)

        return path