def test_sql_select_csv_no_header(client, log_output): json_testcontent = """val1,val2,val3 val4,val5,val6 """ tests = [ ("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'), ] input_serialization = InputSerialization(csv=CSVInput( file_header_info="NONE", allow_quoted_record_delimiter="FALSE", ), ) output_serialization = OutputSerialization(csv=CSVOutput()) try: test_sql_expressions_custom_input_output(client, json_testcontent, input_serialization, output_serialization, tests, log_output) except Exception as select_err: raise select_err # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err)) # pass # Test passes print(log_output.json_report())
def test_csv_output_custom_quote_char(client, log_output): # Get a unique bucket_name and object_name log_output.args['bucket_name'] = bucket_name = generate_bucket_name() tests = [ # UTF-8 quote character ("''", "''", b'col1,col2,col3\n', Exception()), ("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"), ("", '"', b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'), ('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'), ('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'), ('"', '"', b'""""\n', b'""""\n'), ('"', '"', b'\n', b''), ("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"), ("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"), ("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"), ("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"), ("'", "\\", b'col\'\n', b"'col\\''\n"), # Two consecutive escaped quotes ("'", "\\", b'"a"""""\n', b"'a\"\"'\n"), ] client.make_bucket(bucket_name) try: for idx, (quote_char, escape_char, input_data, expected_output) in enumerate(tests): sql_opts = SelectObjectOptions( expression="select * from s3object", input_serialization=InputSerialization( compression_type="NONE", csv=CSVInput( FileHeaderInfo="NONE", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"', Comments="#", AllowQuotedRecordDelimiter="FALSE", ), ), output_serialization=OutputSerialization(csv=CSVOutput( QuoteFields="ALWAYS", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter=quote_char, QuoteEscapeCharacter=escape_char, )), request_progress=RequestProgress(enabled="False")) test_sql_api(f'test_{idx}', client, bucket_name, input_data, sql_opts, expected_output) finally: client.remove_bucket(bucket_name) # Test passes print(log_output.json_report())
def test_xml_marshal_select(self): expected_string = (b'<SelectObjectContentRequest>' b'<Expression>select * from s3object</Expression>' b'<ExpressionType>SQL</ExpressionType>' b'<InputSerialization>' b'<CompressionType>NONE</CompressionType>' b'<CSV><FileHeaderInfo>USE</FileHeaderInfo>' b'<RecordDelimiter>\n</RecordDelimiter>' b'<FieldDelimiter>,</FieldDelimiter>' b'<QuoteCharacter>"</QuoteCharacter>' b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>' b'<Comments>#</Comments>' b'<AllowQuotedRecordDelimiter>false' b'</AllowQuotedRecordDelimiter></CSV>' b'</InputSerialization>' b'<OutputSerialization><CSV>' b'<QuoteFields>ASNEEDED</QuoteFields>' b'<RecordDelimiter>\n</RecordDelimiter>' b'<FieldDelimiter>,</FieldDelimiter>' b'<QuoteCharacter>"</QuoteCharacter>' b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>' b'</CSV></OutputSerialization>' b'<RequestProgress>' b'<Enabled>true</Enabled>' b'</RequestProgress>' b'</SelectObjectContentRequest>') options = SelectObjectOptions( expression="select * from s3object", input_serialization=InputSerialization( compression_type="NONE", csv=CSVInput(FileHeaderInfo="USE", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"', Comments="#", AllowQuotedRecordDelimiter="FALSE"), ), output_serialization=OutputSerialization( csv=CSVOutput(QuoteFields="ASNEEDED", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"') ), request_progress=RequestProgress( enabled="TRUE" ) ) actual_string = xml_marshal_select(options) eq_(expected_string, actual_string)
def test_sql_expressions(client, input_json_bytes, tests, log_output): input_serialization = InputSerialization( compression_type="NONE", json=JSONInput(json_type="DOCUMENT"), ) output_serialization = OutputSerialization(csv=CSVOutput( quote_fields="ASNEEDED")) test_sql_expressions_custom_input_output(client, input_json_bytes, input_serialization, output_serialization, tests, log_output)
def test_csv_output_quote_char(client, log_output): # Get a unique bucket_name and object_name log_output.args['bucket_name'] = bucket_name = generate_bucket_name() tests = [ # UTF-8 quote character ("''", b'col1,col2,col3\n', Exception()), ("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"), ("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'), ('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'), ('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'), ('"', b'\n', b''), ] try: client.make_bucket(bucket_name) for idx, (quote_char, object_content, expected_output) in enumerate(tests): options = SelectObjectOptions( expression="select * from s3object", input_serialization=InputSerialization( compression_type="NONE", csv=CSVInput( FileHeaderInfo="NONE", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"', Comments="#", AllowQuotedRecordDelimiter="FALSE", ), ), output_serialization=OutputSerialization(csv=CSVOutput( QuoteFields="ALWAYS", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter=quote_char, QuoteEscapeCharacter=quote_char, )), request_progress=RequestProgress(enabled="False")) got_output = b'' try: got_output = exec_select(client, bucket_name, object_content, options, log_output) except Exception as select_err: if not isinstance(expected_output, Exception): raise ValueError( 'Test {} unexpectedly failed with: {}'.format( idx + 1, select_err)) else: if isinstance(expected_output, Exception): raise ValueError( 'Test {}: expected an exception, got {}'.format( idx + 1, got_output)) if got_output != expected_output: raise ValueError( 'Test {}: data mismatch. Expected : {}. Received: {}.'. format(idx + 1, expected_output, got_output)) except Exception as err: raise Exception(err) finally: try: client.remove_bucket(bucket_name) except Exception as err: raise Exception(err) # Test passes print(log_output.json_report())
FileHeaderInfo="USE", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"', Comments="#", AllowQuotedRecordDelimiter="FALSE", ), # If input is JSON # json=JSONInput(Type="DOCUMENT",) ), output_serialization=OutputSerialization( csv=CSVOutput( QuoteFields="ASNEEDED", RecordDelimiter="\n", FieldDelimiter=",", QuoteCharacter='"', QuoteEscapeCharacter='"', ) # json = JsonOutput( # RecordDelimiter="\n", # ) ), request_progress=RequestProgress(enabled="False")) try: data = client.select_object_content('your-bucket', 'your-object', options) # Get the records with open('my-record-file', 'w') as record_data:
file_header_info="USE", record_delimiter="\n", field_delimiter=",", quote_character='"', quote_escape_character='"', comments="#", allow_quoted_record_delimiter="FALSE", ), # If input is JSON # json=JSONInput(json_type="DOCUMENT") ), output_serialization=OutputSerialization( csv=CSVOutput( quote_fields="ASNEEDED", record_delimiter="\n", field_delimiter=",", quote_character='"', quote_escape_character='"', ), # json = JSONOutput(record_delimiter="\n") ), request_progress=RequestProgress(enabled="False")) try: data = client.select_object_content('your-bucket', 'your-object', options) # Get the records with open('my-record-file', 'w') as record_data: for d in data.stream(10 * 1024): record_data.write(d)