def test_bad_line_count(self): """send a known bad line_count parameter""" garbage = '-1' event = self._make_event( { 'url': self.FILE_URL, 'input': 'txt', 'line_count': garbage }, {'origin': MOCK_ORIGIN}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 400, f'Expected 400 on event with line_count of {garbage}' body = json.loads(read_body(resp)) assert 'Unexpected line_count=' in body[ 'title'], 'Expected 400 explanation' assert 'out of range' in body['detail'], 'Expected 400 explanation' garbage = '123notint' event = self._make_event( { 'url': self.FILE_URL, 'input': 'txt', 'line_count': garbage }, {'origin': MOCK_ORIGIN}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 400, 'Expected 400 on event with line_count of 123notint' body = json.loads(read_body(resp)) assert 'Unexpected line_count=' in body[ 'title'], 'Expected 400 explanation' assert 'invalid literal' in body['detail'], 'Expected 400 explanation'
def test_ipynb(self): """test sending ipynb bytes""" notebook = BASE_DIR / 'nb_1200727.ipynb' responses.add(responses.GET, self.FILE_URL, body=notebook.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'ipynb'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview failed on nb_1200727.ipynb' body_html = body['html'] # neither lxml, nor py_w3c.validators.html.validator works to validate # these fragments; reasons include base64 encoded images, html entities, etc. # so we are going to trust nbconvert and just do some basic sanity checks # it is also the case that we (often) need to update nbconvert, and # HTML output changes version over version, so checking for exact HTML # is fragile assert body_html.count('<div') > 0, 'expected divs in ipynb HTML' assert body_html.count('<div') == body_html.count('</div>') assert body_html.count('<span') > 0, 'expected spans in ipynb HTML' assert body_html.count('<span') == body_html.count('</span>') # check for some strings we know should be in there assert 'SVD of Minute-Market-Data' in body_html, 'missing expected contents' assert 'Preprocessing' in body_html, 'missing expected contents' assert '<pre>['SEE', 'SE', 'SHW', 'SIG',' in body_html, \ 'Cell 3 output seems off' assert ( '<span class="n">batch_size</span><span class="o">=</span><span class="mi">100</span>' '<span class="p">') in body_html, 'Last cell output missing'
def test_generate_thumbnail(data_dir, input_file, thumb_size, expected_thumb, expected_original_size, expected_thumb_size): # Resolve the input file path input_file = data_dir / input_file # Mock the request url = f"https://example.com/{input_file}" responses.add(responses.GET, url=url, body=input_file.read_bytes(), status=200) # Create the lambda request event event = _make_event({"url": url, "size": thumb_size}) # Get the response response = lambda_handler(event, None) # Assert the request was handled with no errors assert response["statusCode"] == 200 # Parse the body / the returned thumbnail body = json.loads(read_body(response)) # Assert basic metadata was fill properly assert body["info"]["original_size"] == expected_original_size assert body["info"]["thumbnail_size"] == expected_thumb_size # Assert the produced image is the same as the expected actual = AICSImage(base64.b64decode(body['thumbnail'])).reader.data expected = AICSImage(data_dir / expected_thumb).reader.data assert np.array_equal(actual, expected)
def test_tsv_quote(self): """test TSV from the glue NLP dataset""" csv = BASE_DIR / 'dev.tsv' responses.add(responses.GET, self.FILE_URL, body=csv.read_bytes(), status=200) event = self._make_event({ 'url': self.FILE_URL, 'input': 'csv', 'sep': '\t' }) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, f'preview failed on {csv}' body_html = body['html'] assert "<td>While dioxin levels in the environment were up" in body_html,\ "missing expected cell" assert "<td>In Soviet times the Beatles ' music \" was cons...</td>" in body_html,\ "missing expected cell" warnings = body['info']['warnings'] assert warnings, f"expected warnings when parsing {csv}" assert warnings.count( "Skipping line") == 43, "expected to skip 43 lines"
def test_vcf_gz_partial(self): """test previewing part of a gzipped file we _should_ read 4 whole chunks and one partial one; and the preview endpoint should truncate to the last whole line """ vcf = BASE_DIR / 'example.vcf.gz' assert os.path.getsize( vcf) > 128 * 5, 'not testing partial file decode' responses.add(responses.GET, self.FILE_URL, body=vcf.read_bytes(), status=200) event = self._make_event({ 'url': self.FILE_URL, 'input': 'vcf', 'compression': 'gz' }) # test partial decode resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp[ 'statusCode'] == 200, 'preview failed on example.vcf.gz, partial decode' data = body['info']['data'] assert not data['data'], 'partial decode; did not expect any data' assert not data['header'], 'partial decode; did not expect a header' assert data['meta'][0] == '##fileformat=VCFv4.0', 'bad first meta line' assert data['meta'][-1].startswith('##FILTER=<'), 'bad last meta line' assert data['meta'][-1].endswith( 'samples have data">'), 'bad last meta line' meta = body['info']['metadata'] assert meta['variant_count'] == 0, 'expected no variants' assert not body['info']['metadata']['variants'], 'expected no variants'
def test_tsv_as_csv(self): """test returning HTML previews of mislabeled or problematic CSVs (via pandas)""" csv = BASE_DIR / 'tsv_mixed_types.csv' responses.add(responses.GET, self.FILE_URL, body=csv.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'csv'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, f'preview failed on {csv}' body_html = body['html'] assert body_html.count('<table') == 1, 'expected one HTML table' assert body_html.count('</table>') == 1, 'expected one HTML table' assert body_html.count('<thead>') == 1, 'expected one HTML table head' assert body_html.count('</thead>') == 1, 'expected one HTML table head' assert body_html.count('<p>') == body_html.count( '</p>'), 'malformed HTML' assert '<td>Taiwan Strait, Taiwan (general), Taiwan</td>' in body_html, \ 'Missing a cell on the Taiwan Strait' assert not re.match(r'\d+ rows × \d+ columns', body_html), \ 'table dimensions should be removed' with open(BASE_DIR / 'tsv_mixed_types_html_response_head.txt') as expected: head = expected.read() assert head in body_html, 'unexpected first columns'
def test_tsv(self): """test returning HTML previews of TSV (via pandas)""" csv = BASE_DIR / 'avengers.tsv' responses.add(responses.GET, self.FILE_URL, body=csv.read_bytes(), status=200) event = self._make_event({ 'url': self.FILE_URL, 'input': 'csv', 'sep': '\t' }) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, f'preview failed on {csv}' body_html = body['html'] assert body_html.count('<table') == 1, 'expected one HTML table' assert body_html.count('</table>') == 1, 'expected one HTML table' assert body_html.count('<thead>') == 1, 'expected one HTML table head' assert body_html.count('</thead>') == 1, 'expected one HTML table head' assert body_html.count('<p>') == body_html.count( '</p>'), 'malformed HTML' assert '<td>Nicholas Fury, Jr., Marcus Johnson</td>' in body_html, \ 'Expected Nick to be an Avenger' assert not re.match(r'\d+ rows × \d+ columns', body_html), \ 'table dimensions should be removed' with open(BASE_DIR / 'tsv_html_response_head.txt') as expected: head = expected.read() assert head in body_html, 'unexpected first columns'
def test_vcf(self): """test sending vcf bytes""" vcf = BASE_DIR / 'example.vcf' responses.add(responses.GET, self.FILE_URL, body=vcf.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'vcf'}) resp = index.lambda_handler(event, None) assert resp['statusCode'] == 200, 'preview failed on example.vcf' _check_vcf(read_body(resp))
def test_bad_hostname(self): bad_url = 'https://example.com/foo' event = self._make_event({ 'url': bad_url, 'input': 'txt' }, {'origin': MOCK_ORIGIN}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 400, 'Expected 400 on event with a non-S3 URL' body = json.loads(read_body(resp)) assert 'S3' in body['title'], 'Expected 400 explanation'
def test_folder_view_paging(self): """ End-to-end test (top-level folder view with a limit & offset) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict( bucket=bucket, manifest=key, action="dir", params={ "path": "paging_test/", "limit": 10, "offset": 10, }, ) expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': {'Type': 'LINES'} }, 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, } paging_logical_keys = [ f"f{i:03d}.csv" for i in range(1000) ] s3response_paging = self.make_manifest_query(paging_logical_keys) mock_s3 = boto3.client('s3') with patch.object( mock_s3, 'select_object_content', side_effect=[ s3response_paging, self.s3response_meta ] ) as client_patch, patch( 'boto3.Session.client', return_value=mock_s3 ): response = pkgselect.lambda_handler(params, None) print(response) folder = json.loads(read_body(response))['result'] assert len(folder['prefixes']) == 0 assert len(folder['objects']) == 10 assert folder['total'] == 1000 assert folder['objects'][0]['logical_key'] == 'f010.csv'
def test_fcs(self): """test fcs extraction for extended testing you can download FCS files here https://flowrepository.org/experiments/4/download_ziped_files, copy to data/fcs/ and run this unit test """ parent = BASE_DIR / "fcs" fcs_files = list(parent.glob("*.fcs")) extended = False if (set(os.path.split(f)[1] for f in fcs_files) != set( ['accuri-ao1.fcs', 'bad.fcs', '3215apc 100004.fcs'])): extended = True first = True for fcs in fcs_files: _, name = os.path.split(fcs) file_bytes = fcs.read_bytes() if first: responses.add( responses.GET, self.FILE_URL, body=file_bytes, status=200, ) first = False else: responses.replace( responses.GET, self.FILE_URL, body=file_bytes, status=200, ) event = self._make_event({'url': self.FILE_URL, 'input': 'fcs'}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 200, f'Expected 200, got {resp["statusCode"]}' body = json.loads(read_body(resp)) assert 'info' in body if 'warnings' not in body['info']: if not extended: assert name == 'accuri-ao1.fcs' assert body['html'].startswith('<div>') assert body['html'].endswith('</div>') assert body['info']['metadata'].keys() else: assert not body['html'] if 'metadata' not in body['info']: assert body['info']['warnings'].startswith('Unable') if not extended: assert name == 'bad.fcs' else: if not extended: assert name == '3215apc 100004.fcs'
def test_empty_manifest(self): """ End-to-end test (folder view without a prefix) for an empty package manifest """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict(bucket=bucket, manifest=key, access_key="TESTKEY", secret_key="TESTSECRET", session_token="TESTSESSION") expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': { 'Type': 'LINES' } }, 'OutputSerialization': { 'JSON': { 'RecordDelimiter': '\n' } }, } # Empty manifest jsonl = '{"version": "v0", "message": null}' streambytes = jsonl.encode() non_string_s3response = self.make_s3response(streambytes) mock_s3 = boto3.client('s3') with patch.object(mock_s3, 'select_object_content', side_effect=[ non_string_s3response, self.s3response_meta ]) as client_patch, patch('boto3.Session.client', return_value=mock_s3): response = lambda_handler(self._make_event(params), None) print(response) assert response['statusCode'] == 200 folder = json.loads(read_body(response))['contents'] assert not folder['prefixes'] assert not folder['objects'] assert folder['total'] == 0
def test_generate_thumbnail( data_dir, input_file, params, expected_thumb, expected_original_size, expected_thumb_size, num_pages, status ): # don't actually modify the environment in tests with patch.object(index, 'set_pdf_env', return_value=None) as set_env: # Resolve the input file path input_file = data_dir / input_file # Mock the request url = f"https://example.com/{input_file}" responses.add( responses.GET, url=url, body=input_file.read_bytes(), status=200 ) # Create the lambda request event event = _make_event({"url": url, **params}) # Get the response response = index.lambda_handler(event, None) # Assert the request was handled with no errors assert response["statusCode"] == 200, f"response: {response}" # only check the body and expected image if it's a successful call # Parse the body / the returned thumbnail body = json.loads(read_body(response)) # Assert basic metadata was filled properly assert body["info"]["thumbnail_size"] == expected_thumb_size if expected_original_size: # PDFs don't have an expected size assert body["info"]["original_size"] == expected_original_size if "countPages" in params: assert body["info"]["page_count"] == num_pages # Assert the produced image is the same as the expected if params.get('input') == 'pdf': actual = Image.open(BytesIO(base64.b64decode(body['thumbnail']))) expected = Image.open(data_dir / expected_thumb) actual_array = np.array(actual) expected_array = np.array(expected) assert set_env.call_count == 1 assert actual_array.shape == expected_array.shape assert np.allclose(expected_array, actual_array, atol=15, rtol=0.1) else: actual = AICSImage(base64.b64decode(body['thumbnail'])).reader.data expected = AICSImage(data_dir / expected_thumb).reader.data assert np.array_equal(actual, expected)
def test_bad_max_bytes(self): """send a known bad max_bytes parameter""" garbage = 'gfgfgf' event = self._make_event( { 'url': self.FILE_URL, 'input': 'txt', 'max_bytes': garbage }, {'origin': MOCK_ORIGIN}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 400, f'Expected 400 on event with line_count of {garbage}' body = json.loads(read_body(resp)) assert 'Unexpected max_bytes=' in body[ 'title'], 'Expected 400 explanation'
def test_parquet_empty(self): """test a parquet file with columns but no rows""" parquet = BASE_DIR / 'onlycolumns-c000' responses.add(responses.GET, self.FILE_URL, body=parquet.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'parquet'}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 200, f'Expected 200, got {resp["statusCode"]}' body = json.loads(read_body(resp)) assert '<th>column_a</th>' in body['html'], 'Missing column_a' assert '<th>column_k</th>' in body['html'], 'Missing column_k' assert '<th>column_z</th>' in body['html'], 'Missing column_z'
def test_generate_thumbnail(data_dir, input_file, params, expected_thumb, expected_original_size, expected_thumb_size, num_pages, status): # Resolve the input file path input_file = data_dir / input_file # Mock the request url = f"https://example.com/{input_file}" responses.add(responses.GET, url=url, body=input_file.read_bytes(), status=200) # Create the lambda request event event = _make_event({"url": url, **params}) # Get the response if expected_thumb == "I16-mode-128-fallback.png": # Note that if this set of params fails, it may be that better resamplers # have been added for this mode, and either the image or test will need # to be updated. with _mock(t4_lambda_thumbnail, '_convert_I16_to_L', Image.fromarray): response = t4_lambda_thumbnail.lambda_handler(event, None) else: response = t4_lambda_thumbnail.lambda_handler(event, None) # Assert the request was handled with no errors assert response["statusCode"] == 200, f"response: {response}" # only check the body and expected image if it's a successful call # Parse the body / the returned thumbnail body = read_body(response) # Assert basic metadata was filled properly info = json.loads(response["headers"][QUILT_INFO_HEADER]) assert info["thumbnail_size"] == expected_thumb_size if expected_original_size: # PDFs don't have an expected size assert info["original_size"] == expected_original_size if "countPages" in params: assert info["page_count"] == num_pages # Assert the produced image is the same as the expected if params.get('input') in ('pdf', "pptx"): actual = Image.open(BytesIO(body)) expected = Image.open(data_dir / expected_thumb) actual_array = np.array(actual) expected_array = np.array(expected) assert actual_array.shape == expected_array.shape assert np.allclose(expected_array, actual_array, atol=15, rtol=0.1) else: actual = AICSImage(body) expected = AICSImage(data_dir / expected_thumb) assert actual.size() == expected.size() assert np.array_equal(actual.reader.data, expected.reader.data)
def test_excel(self): """test parsing excel files in S3""" workbook = BASE_DIR / 'sample.xlsx' responses.add(responses.GET, self.FILE_URL, body=workbook.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'excel'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview failed on sample.xlsx' body_html = body['html'] assert body_html.count('Germany') == 13, 'unexpected data contents' assert body_html.count('Enterprise') == 7, 'unexpected data contents' assert body_html.count('Midmarket') == 13, 'unexpected data contents' assert body_html.count('Canada') == 9, 'unexpected data contents'
def test_ipynb_chop(self): """test that we eliminate output cells when we're in danger of breaking Lambda's invocation limit""" notebook = BASE_DIR / 'nb_1200727.ipynb' responses.add(responses.GET, self.FILE_URL, body=notebook.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'ipynb'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview failed on nb_1200727.ipynb' body_html = body['html'] # isclose bc string sizes differ, e.g. on Linux assert math.isclose(len(body_html), 18084, abs_tol=200), "Hmm, didn't chop nb_1200727.ipynb"
def test_folder_view(self): """ End-to-end test (folder view without a prefix) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict(bucket=bucket, manifest=key, access_key="TESTKEY", secret_key="TESTSECRET", session_token="TESTSESSION") expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': { 'Type': 'LINES' } }, 'OutputSerialization': { 'JSON': { 'RecordDelimiter': '\n' } }, } mock_s3 = boto3.client('s3') client_patch = patch.object( mock_s3, 'select_object_content', side_effect=[self.s3response, self.s3response_meta]) client_patch.start() with patch('boto3.Session.client', return_value=mock_s3): response = lambda_handler(self._make_event(params), None) print(response) assert response['statusCode'] == 200 folder = json.loads(read_body(response))['contents'] assert len(folder['prefixes']) == 1 assert len(folder['objects']) == 1 assert folder['objects'][0]['logical_key'] == 'foo.csv' assert folder['prefixes'][0]['logical_key'] == 'bar/' client_patch.stop()
def test_parquet(self): """test sending parquet bytes""" parquet = BASE_DIR / 'atlantic_storms.parquet' info_response = BASE_DIR / 'parquet_info_response.json' responses.add(responses.GET, self.FILE_URL, body=parquet.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'parquet'}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 200, f'Expected 200, got {resp["statusCode"]}' body = json.loads(read_body(resp)) with open(info_response, 'r') as info_json: expected = json.load(info_json) assert (body['info'] == expected), \ f'Unexpected body["info"] for {parquet}'
def test_txt_short(self): """test sending txt bytes""" txt = BASE_DIR / 'short.txt' responses.add(responses.GET, self.FILE_URL, body=txt.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'txt'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview lambda failed on short.txt' headlist = body['info']['data']['head'] assert len(headlist) == 98, 'unexpected number of lines head' assert headlist[0] == 'Line 1', 'unexpected first line in head' assert headlist[97] == 'Line 98', 'unexpected last line in head' taillist = body['info']['data']['tail'] assert not taillist, 'expected empty tail'
def test_detail_view(self): """ End-to-end test (detail view) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" logical_key = "bar/file1.txt" params = dict(bucket=bucket, manifest=key, logical_key=logical_key, access_key="TESTKEY", secret_key="TESTSECRET", session_token="TESTSESSION") expected_sql = "SELECT s.* FROM s3object s WHERE s.logical_key = 'bar/file1.txt' LIMIT 1" expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': { 'Type': 'LINES' } }, 'OutputSerialization': { 'JSON': { 'RecordDelimiter': '\n' } }, } mock_s3 = boto3.client('s3') client_patch = patch.object(mock_s3, 'select_object_content', return_value=self.s3response_detail) client_patch.start() with patch('boto3.Session.client', return_value=mock_s3): response = lambda_handler(self._make_event(params), None) print(response) assert response['statusCode'] == 200 json.loads(read_body(response))['contents'] client_patch.stop()
def test_max_bytes(self): """test max bytes""" txt = BASE_DIR / 'short.txt' responses.add(responses.GET, self.FILE_URL, body=txt.read_bytes(), status=200) event = self._make_event({ 'url': self.FILE_URL, 'input': 'txt', 'max_bytes': '3' }) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview lambda failed on short.txt' headlist = body['info']['data']['head'] assert len(headlist) == 1, 'unexpected number of lines head' assert headlist[0] == 'Line', 'unexpected first line in head'
def test_csv(self): """test returning HTML previews of CSV (via pandas)""" csv = BASE_DIR / 'sample.csv' responses.add(responses.GET, self.FILE_URL, body=csv.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'csv'}) resp = index.lambda_handler(event, None) body = json.loads(read_body(resp)) assert resp['statusCode'] == 200, 'preview failed on sample.csv' body_html = body['html'] assert body_html.count('<table') == 1, 'expected one HTML table' assert body_html.count('</table>') == 1, 'expected one HTML table' assert body_html.count('<p>') == body_html.count( '</p>'), 'malformed HTML' assert not re.match(r'\d+ rows × \d+ columns', body_html), \ 'table dimensions should be removed' with open(BASE_DIR / 'csv_html_response_head.txt') as expected: head = expected.read() assert head in body_html, 'unexpected first columns'
def test_parquet_no_pandas(self): """test sending parquet bytes, but with a different metadata format""" parquet = BASE_DIR / 'parquet_no_pandas.snappy.parquet' responses.add(responses.GET, self.FILE_URL, body=parquet.read_bytes(), status=200) event = self._make_event({'url': self.FILE_URL, 'input': 'parquet'}) resp = index.lambda_handler(event, None) assert resp[ 'statusCode'] == 200, f'Expected 200, got {resp["statusCode"]}' body = json.loads(read_body(resp)) # open file and check body return against parquet metadata pf = pq.ParquetFile(parquet) assert all(f'<th>{col}</th>' in body['html'] for col in pf.schema.names), \ 'missing a column header in the preview' assert body['html'].count('<') > 0, 'expected tags in HTML' assert body['html'].count('<') == body['html'].count('>'), \ 'unmatched HTML tags' assert set(pf.schema.names) == set(body['info']['schema']['names']), \ 'unexpected difference of columns'
def test_folder_view(self): """ End-to-end test (folder view without a prefix) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict( bucket=bucket, manifest=key, action="dir", ) expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': {'Type': 'LINES'} }, 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, } mock_s3 = boto3.client('s3') with patch.object( mock_s3, 'select_object_content', side_effect=[ self.s3response, self.s3response_meta, ] ) as client_patch, patch('boto3.Session.client', return_value=mock_s3): response = pkgselect.lambda_handler(params, None) print(response) folder = json.loads(read_body(response))['result'] assert len(folder['prefixes']) == 1 assert len(folder['objects']) == 1 assert folder['objects'][0]['logical_key'] == 'foo.csv' assert folder['prefixes'][0]['logical_key'] == 'bar/'
def test_detail_view(self): """ End-to-end test (detail view) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" logical_key = "bar/file1.txt" params = dict( bucket=bucket, manifest=key, action="file", params={"path": logical_key}, ) expected_sql = "SELECT s.* FROM s3object s WHERE s.logical_key = 'bar/file1.txt' LIMIT 1" expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': {'Type': 'LINES'} }, 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, } mock_s3 = boto3.client('s3') with patch.object( mock_s3, 'select_object_content', return_value=self.s3response_detail ) as client_patch, patch( 'boto3.Session.client', return_value=mock_s3 ): response = pkgselect.lambda_handler(params, None) print(response) json.loads(read_body(response))['result']
def test_anon_access(self): """ Test anonymous call w/ ALLOW_ANONYMOUS_ACCESS """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict( bucket=bucket, manifest=key, ) expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': {'Type': 'LINES'} }, 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, } env_patcher = patch.dict(os.environ, { 'AWS_ACCESS_KEY_ID': 'test_key', 'AWS_SECRET_ACCESS_KEY': 'test_secret', 'ALLOW_ANONYMOUS_ACCESS': '1' }) env_patcher.start() mock_s3 = boto3.client('s3') client_patch = patch.object( mock_s3, 'select_object_content', side_effect=[ self.s3response, self.s3response_meta ] ) client_patch.start() response = { 'ETag': '12345', 'VersionId': '1.0', 'ContentLength': 123, } expected_params = { 'Bucket': bucket, 'Key': key, } s3_stubber = Stubber(mock_s3) s3_stubber.activate() s3_stubber.add_response('head_object', response, expected_params) with patch('boto3.Session.client', return_value=mock_s3): response = lambda_handler(self._make_event(params), None) print(response) assert response['statusCode'] == 200 folder = json.loads(read_body(response))['contents'] print(folder) assert len(folder['prefixes']) == 1 assert len(folder['objects']) == 1 assert folder['objects'][0]['logical_key'] == 'foo.csv' assert folder['prefixes'][0]['logical_key'] == 'bar/' s3_stubber.deactivate() client_patch.stop() env_patcher.stop()
def test_non_string_keys(self): """ End-to-end test (folder view without a prefix) """ bucket = "bucket" key = ".quilt/packages/manifest_hash" params = dict( bucket=bucket, manifest=key, action="dir", ) expected_args = { 'Bucket': bucket, 'Key': key, 'Expression': "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s", 'ExpressionType': 'SQL', 'InputSerialization': { 'CompressionType': 'NONE', 'JSON': {'Type': 'LINES'} }, 'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}}, } # Return a response with keys that are not strings (integers here) # The important test case is where all members of a column are # non-string logical_keys = [ "1", "2", "3", ] entries = [] for key in logical_keys: entry = dict( logical_key=key, physical_key=key, size=100 ) entries.append(json.dumps(entry)) jsonl = "\n".join(entries) streambytes = jsonl.encode() non_string_s3response = self.make_s3response(streambytes) mock_s3 = boto3.client('s3') with patch.object( mock_s3, 'select_object_content', side_effect=[ non_string_s3response, self.s3response_meta ] ) as client_patch, patch( 'boto3.Session.client', return_value=mock_s3 ): response = pkgselect.lambda_handler(params, None) print(response) folder = json.loads(read_body(response))['result'] assert not folder['prefixes'] assert len(folder['objects']) == 3 assert folder['objects'][0]['logical_key'] == '1' assert folder['objects'][1]['logical_key'] == '2' assert folder['objects'][2]['logical_key'] == '3'