Пример #1
0
def test_sql_select_csv_no_header(client, log_output):
    json_testcontent = """val1,val2,val3
val4,val5,val6
"""
    tests = [
        ("select_1", "SELECT s._2 FROM S3Object as s", b'val2\nval5\n'),
    ]

    input_serialization = InputSerialization(csv=CSVInput(
        file_header_info="NONE",
        allow_quoted_record_delimiter="FALSE",
    ), )

    output_serialization = OutputSerialization(csv=CSVOutput())
    try:
        test_sql_expressions_custom_input_output(client, json_testcontent,
                                                 input_serialization,
                                                 output_serialization, tests,
                                                 log_output)
    except Exception as select_err:
        raise select_err
        # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
        # pass

    # Test passes
    print(log_output.json_report())
Пример #2
0
def test_sql_select_json(client, log_output):
    json_testcontent = """{ "Rules": [ {"id": "1"}, {"expr": "y > x"}, {"id": "2", "expr": "z = DEBUG"} ]}
{ "created": "June 27", "modified": "July 6" }
"""
    tests = [
        ("select_1", "SELECT id FROM S3Object[*].Rules[*].id",
         b'{"id":"1"}\n{}\n{"id":"2"}\n{}\n'),
        ("select_2",
         "SELECT id FROM S3Object[*].Rules[*].id WHERE id IS NOT MISSING",
         b'{"id":"1"}\n{"id":"2"}\n'),
        ("select_3", "SELECT d.created, d.modified FROM S3Object[*] d",
         b'{}\n{"created":"June 27","modified":"July 6"}\n'),
        ("select_4", "SELECT _1.created, _1.modified FROM S3Object[*]",
         b'{}\n{"created":"June 27","modified":"July 6"}\n'),
        ("select_5", "Select s.rules[1].expr from S3Object s",
         b'{"expr":"y > x"}\n{}\n'),
    ]

    input_serialization = InputSerialization(json=JSONInput(
        json_type="DOCUMENT"))
    output_serialization = OutputSerialization(json=JSONOutput())
    try:
        test_sql_expressions_custom_input_output(client, json_testcontent,
                                                 input_serialization,
                                                 output_serialization, tests,
                                                 log_output)
    except Exception as select_err:
        raise select_err
        # raise ValueError('Test {} unexpectedly failed with: {}'.format(test_name, select_err))
        # pass

    # Test passes
    print(log_output.json_report())
Пример #3
0
def test_csv_output_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", "''", b'col1,col2,col3\n', Exception()),
        ("'", "'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", '"', b'col1,col2,col3\n',
         b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', '"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', '"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', '"', b'""""\n', b'""""\n'),
        ('"', '"', b'\n', b''),
        ("'", "\\", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("'", "\\", b'col""1,col2,col3\n', b"'col\"\"1','col2','col3'\n"),
        ("'", "\\", b'col\'1,col2,col3\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'"col\'1","col2","col3"\n', b"'col\\'1','col2','col3'\n"),
        ("'", "\\", b'col\'\n', b"'col\\''\n"),
        # Two consecutive escaped quotes
        ("'", "\\", b'"a"""""\n', b"'a\"\"'\n"),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, input_data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=escape_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, input_data,
                         sql_opts, expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Пример #4
0
    def test_xml_marshal_select(self):
        expected_string = (b'<SelectObjectContentRequest>'
                           b'<Expression>select * from s3object</Expression>'
                           b'<ExpressionType>SQL</ExpressionType>'
                           b'<InputSerialization>'
                           b'<CompressionType>NONE</CompressionType>'
                           b'<CSV><FileHeaderInfo>USE</FileHeaderInfo>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'<Comments>#</Comments>'
                           b'<AllowQuotedRecordDelimiter>false'
                           b'</AllowQuotedRecordDelimiter></CSV>'
                           b'</InputSerialization>'
                           b'<OutputSerialization><CSV>'
                           b'<QuoteFields>ASNEEDED</QuoteFields>'
                           b'<RecordDelimiter>\n</RecordDelimiter>'
                           b'<FieldDelimiter>,</FieldDelimiter>'
                           b'<QuoteCharacter>"</QuoteCharacter>'
                           b'<QuoteEscapeCharacter>"</QuoteEscapeCharacter>'
                           b'</CSV></OutputSerialization>'
                           b'<RequestProgress>'
                           b'<Enabled>true</Enabled>'
                           b'</RequestProgress>'
                           b'</SelectObjectContentRequest>')

        options = SelectObjectOptions(
            expression="select * from s3object",
            input_serialization=InputSerialization(
                compression_type="NONE",
                csv=CSVInput(FileHeaderInfo="USE",
                             RecordDelimiter="\n",
                             FieldDelimiter=",",
                             QuoteCharacter='"',
                             QuoteEscapeCharacter='"',
                             Comments="#",
                             AllowQuotedRecordDelimiter="FALSE"),
            ),

            output_serialization=OutputSerialization(
                csv=CSVOutput(QuoteFields="ASNEEDED",
                              RecordDelimiter="\n",
                              FieldDelimiter=",",
                              QuoteCharacter='"',
                              QuoteEscapeCharacter='"')
            ),
            request_progress=RequestProgress(
                enabled="TRUE"
            )
        )
        actual_string = xml_marshal_select(options)
        eq_(expected_string, actual_string)
Пример #5
0
def test_sql_expressions(client, input_json_bytes, tests, log_output):
    input_serialization = InputSerialization(
        compression_type="NONE",
        json=JSONInput(json_type="DOCUMENT"),
    )

    output_serialization = OutputSerialization(csv=CSVOutput(
        quote_fields="ASNEEDED"))

    test_sql_expressions_custom_input_output(client, input_json_bytes,
                                             input_serialization,
                                             output_serialization, tests,
                                             log_output)
Пример #6
0
def test_csv_input_custom_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', '"', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع', '"', 'عcol1ع,عcol2ع,عcol3ع\n'.encode(),
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', '"', b'"col1",col2,col3\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        ('"', '"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', '"', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', '"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
        ('"', '"', b'A",B\n', b'{"_1":"A\\"","_2":"B"}\n'),
        ('"', '"', b'A"",B\n', b'{"_1":"A\\"\\"","_2":"B"}\n'),
        ('"', '\\', b'A\\B,C\n', b'{"_1":"A\\\\B","_2":"C"}\n'),
        ('"', '"', b'"A""B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\B","CD"\n', b'{"_1":"AB","_2":"CD"}\n'),
        ('"', '\\', b'"A\\,","CD"\n', b'{"_1":"A,","_2":"CD"}\n'),
        ('"', '\\', b'"A\\"B","CD"\n', b'{"_1":"A\\"B","_2":"CD"}\n'),
        ('"', '\\', b'"A\\""\n', b'{"_1":"A\\""}\n'),
        ('"', '\\', b'"A\\"\\"B"\n', b'{"_1":"A\\"\\"B"}\n'),
        ('"', '\\', b'"A\\"","\\"B"\n', b'{"_1":"A\\"","_2":"\\"B"}\n'),
    ]

    client.make_bucket(bucket_name)

    try:
        for idx, (quote_char, escape_char, data,
                  expected_output) in enumerate(tests):
            sql_opts = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=escape_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            test_sql_api(f'test_{idx}', client, bucket_name, data, sql_opts,
                         expected_output)
    finally:
        client.remove_bucket(bucket_name)

    # Test passes
    print(log_output.json_report())
Пример #7
0
def test_csv_output_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # UTF-8 quote character
        ("''", b'col1,col2,col3\n', Exception()),
        ("'", b'col1,col2,col3\n', b"'col1','col2','col3'\n"),
        ("", b'col1,col2,col3\n', b'\x00col1\x00,\x00col2\x00,\x00col3\x00\n'),
        ('"', b'col1,col2,col3\n', b'"col1","col2","col3"\n'),
        ('"', b'col"1,col2,col3\n', b'"col""1","col2","col3"\n'),
        ('"', b'\n', b''),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter='"',
                        QuoteEscapeCharacter='"',
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(csv=CSVOutput(
                    QuoteFields="ALWAYS",
                    RecordDelimiter="\n",
                    FieldDelimiter=",",
                    QuoteCharacter=quote_char,
                    QuoteEscapeCharacter=quote_char,
                )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}. Received: {}.'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Пример #8
0
def test_csv_input_quote_char(client, log_output):
    # Get a unique bucket_name and object_name
    log_output.args['bucket_name'] = bucket_name = generate_bucket_name()

    tests = [
        # Invalid quote character, should fail
        ('""', b'col1,col2,col3\n', Exception()),
        # UTF-8 quote character
        ('ع',
         b'\xd8\xb9col1\xd8\xb9,\xd8\xb9col2\xd8\xb9,\xd8\xb9col3\xd8\xb9\n',
         b'{"_1":"col1","_2":"col2","_3":"col3"}\n'),
        # Only one field is quoted
        ('"', b'"col1",col2,col3\n', b'{"_1":"col1","_2":"col2","_3":"col3"}\n'
         ),
        ('"', b'"col1,col2,col3"\n', b'{"_1":"col1,col2,col3"}\n'),
        ('\'', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1",col2,col3\n',
         b'{"_1":"\\"col1\\"","_2":"col2","_3":"col3"}\n'),
        ('', b'"col1","col2","col3"\n',
         b'{"_1":"\\"col1\\"","_2":"\\"col2\\"","_3":"\\"col3\\""}\n'),
        ('"', b'""""""\n', b'{"_1":"\\"\\""}\n'),
    ]

    try:
        client.make_bucket(bucket_name)

        for idx, (quote_char, object_content,
                  expected_output) in enumerate(tests):
            options = SelectObjectOptions(
                expression="select * from s3object",
                input_serialization=InputSerialization(
                    compression_type="NONE",
                    csv=CSVInput(
                        FileHeaderInfo="NONE",
                        RecordDelimiter="\n",
                        FieldDelimiter=",",
                        QuoteCharacter=quote_char,
                        QuoteEscapeCharacter=quote_char,
                        Comments="#",
                        AllowQuotedRecordDelimiter="FALSE",
                    ),
                ),
                output_serialization=OutputSerialization(
                    json=JsonOutput(RecordDelimiter="\n", )),
                request_progress=RequestProgress(enabled="False"))

            got_output = b''

            try:
                got_output = exec_select(client, bucket_name, object_content,
                                         options, log_output)
            except Exception as select_err:
                if not isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {} unexpectedly failed with: {}'.format(
                            idx + 1, select_err))
            else:
                if isinstance(expected_output, Exception):
                    raise ValueError(
                        'Test {}: expected an exception, got {}'.format(
                            idx + 1, got_output))
                if got_output != expected_output:
                    raise ValueError(
                        'Test {}: data mismatch. Expected : {}, Received {}'.
                        format(idx + 1, expected_output, got_output))

    except Exception as err:
        raise Exception(err)
    finally:
        try:
            client.remove_bucket(bucket_name)
        except Exception as err:
            raise Exception(err)

    # Test passes
    print(log_output.json_report())
Пример #9
0
                                  InputSerialization, OutputSerialization,
                                  CSVOutput, JsonOutput)

client = Minio('s3.amazonaws.com',
               access_key='YOUR-ACCESSKEY',
               secret_key='YOUR-SECRETKEY')

options = SelectObjectOptions(
    expression="select * from s3object",
    input_serialization=InputSerialization(
        compression_type="NONE",
        csv=CSVInput(
            FileHeaderInfo="USE",
            RecordDelimiter="\n",
            FieldDelimiter=",",
            QuoteCharacter='"',
            QuoteEscapeCharacter='"',
            Comments="#",
            AllowQuotedRecordDelimiter="FALSE",
        ),
        # If input is JSON
        # json=JSONInput(Type="DOCUMENT",)
    ),
    output_serialization=OutputSerialization(
        csv=CSVOutput(
            QuoteFields="ASNEEDED",
            RecordDelimiter="\n",
            FieldDelimiter=",",
            QuoteCharacter='"',
            QuoteEscapeCharacter='"',
        )
Пример #10
0
# from minio.select.options import JsonInput
# from minio.select.options import ParquetInput

client = Minio('s3.amazonaws.com',
               access_key='YOUR-ACCESSKEY',
               secret_key='YOUR-SECRETKEY')

options = SelectObjectOptions(
    expression="select * from s3object",
    input_serialization=InputSerialization(
        compression_type="NONE",
        csv=CSVInput(
            file_header_info="USE",
            record_delimiter="\n",
            field_delimiter=",",
            quote_character='"',
            quote_escape_character='"',
            comments="#",
            allow_quoted_record_delimiter="FALSE",
        ),
        # If input is JSON
        # json=JSONInput(json_type="DOCUMENT")
    ),
    output_serialization=OutputSerialization(
        csv=CSVOutput(
            quote_fields="ASNEEDED",
            record_delimiter="\n",
            field_delimiter=",",
            quote_character='"',
            quote_escape_character='"',
        ),