def test_http_download_to_file_throws_exception_on_error(): response = requests.Response() response.status_code = 404 response.iter_content = lambda _: ['content'] stringio = StringIO() session = requests.Session() session.get = lambda _: response dumper.http_download_to_file('http://example.com', stringio, session)
def test_http_download_to_file_without_session_uses_requests_get(mock_get): response = requests.Response() response.status_code = 200 response.iter_content = lambda _: ['content'] mock_get.return_value = response stringio = StringIO() dumper.http_download_to_file('http://example.com', stringio) stringio.seek(0) assert stringio.read() == 'content'
def test_http_download_to_file_with_session(): response = requests.Response() response.status_code = 200 response.iter_content = lambda _: ['content'] stringio = StringIO() session = requests.Session() session.get = lambda _: response dumper.http_download_to_file('http://example.com', stringio, session) stringio.seek(0) assert stringio.read() == 'content'
def test_http_download_to_file_without_session_uses_requests_get(): response = requests.Response() response.status_code = 200 response.iter_content = lambda _: ['content'] stringio = StringIO() with stubbed(requests.get, lambda _, stream=False: response): dumper.http_download_to_file('http://example.com', stringio) stringio.seek(0) eq_(stringio.read(), 'content')
def download(cls, rse, date='latest', cache_dir=DUMPS_CACHE_DIR): """ Downloads the requested dump and returns an open read-only mode file like object. """ logger = logging.getLogger('auditor.data_models') requests_session = get_requests_session() if date == 'latest': url = ''.join((cls.BASE_URL, cls.URI, '?rse={0}'.format(rse))) request_headers = requests_session.head(url) for field in request_headers.headers['content-disposition'].split( ';'): if field.startswith('filename='): date = field.split('=')[1].split('_')[-1].split('.')[0] else: assert isinstance(date, datetime.datetime) date = date.strftime('%d-%m-%Y') # pylint: disable=no-member url = ''.join(( cls.BASE_URL, cls.URI, '?rse={0}&date={1}'.format(rse, date), )) if not os.path.isdir(cache_dir): os.mkdir(cache_dir) filename = '{0}_{1}_{2}_{3}'.format( cls.__name__.lower(), rse, date, hashlib.sha1(url.encode()).hexdigest()) filename = re.sub(r'\W', '-', filename) path = os.path.join(cache_dir, filename) if not os.path.exists(path): logger.debug('Trying to download: "%s"', url) response = requests_session.head(url) if response.status_code != 200: logger.error( 'Retrieving %s returned %d status code', url, response.status_code, ) raise HTTPDownloadFailed('Downloading {0} dump'.format( cls.__name__), code=response.status_code) with temp_file(cache_dir, final_name=filename) as (tfile, _): http_download_to_file(url, tfile, session=requests_session) return path