示例#1
0
def test_fixed_width_input_as_file_object_no_error_allowed(
        schema, fixed_width_file_name):
    with open(fixed_width_file_name, 'r') as sf:
        p_allow_no_error = Parser(schema=schema,
                                  input_row_format='fixed-width')
        with pytest.raises(Exception):
            list(p_allow_no_error.parse(sf))
示例#2
0
def test_json_input_as_file_object_no_error_allowed(schema, json_file_name):
    with open(json_file_name, 'r') as sf:
        p_allow_no_error = Parser(schema=schema,
                                  input_row_format='json',
                                  parsed_row_format="json")
        with pytest.raises(Exception):
            list(p_allow_no_error.parse(sf))
def test_delimited_input_as_lol_all_error_allowed(schema, delimited_bad_lol_details):
    p_allow_all_error = Parser(schema=schema, stop_on_error=-1, input_row_sep=delimited_bad_lol_details[1])
    parsed_data = p_allow_all_error.parse(delimited_bad_lol_details[0])
    assert isinstance(parsed_data, types.GeneratorType)
    parsed_lines = []
    for parsed_line in parsed_data:  # calling data parsing on file object
        assert isinstance(parsed_line, str)
        parsed_lines.append(parsed_line)
    assert len(parsed_lines) == 1
def test_delimited_input_as_file_object_one_error_allowed(schema, delimited_file_details):
    with open(delimited_file_details[0], 'r') as sf:
        p_allow_one_error = Parser(schema=schema, stop_on_error=1)
        parsed_data = p_allow_one_error.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, str)
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 2
示例#5
0
def test_delimited_input_as_lol_all_error_allowed(schema, fixed_width_bad_lol):
    p_allow_all_error = Parser(schema=schema,
                               stop_on_error=-1,
                               input_row_format='fixed-width')
    parsed_data = p_allow_all_error.parse(fixed_width_bad_lol)
    assert isinstance(parsed_data, types.GeneratorType)
    parsed_lines = []
    for parsed_line in parsed_data:  # calling data parsing on file object
        assert isinstance(parsed_line, str)
        parsed_lines.append(parsed_line)
    assert len(parsed_lines) == 1
示例#6
0
def test_json_input_as_lol_all_error_allowed(schema, json_bad_lol):
    p_allow_all_error = Parser(schema=schema,
                               stop_on_error=-1,
                               input_row_format='json',
                               parsed_row_format="dict")
    parsed_data = p_allow_all_error.parse(json_bad_lol)
    assert isinstance(parsed_data, types.GeneratorType)
    parsed_lines = []
    for parsed_line in parsed_data:  # calling data parsing on file object
        assert isinstance(parsed_line, dict)
        parsed_lines.append(parsed_line)
    assert len(parsed_lines) == 0
示例#7
0
def test_fixed_width_input_as_file_object_all_error_allowed(
        schema, fixed_width_bad_file_name):
    with open(fixed_width_bad_file_name, 'r') as sf:
        p_allow_all_error = Parser(schema=schema,
                                   stop_on_error=-1,
                                   input_row_format='fixed-width')
        parsed_data = p_allow_all_error.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, str)
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 1
def test_delimited_input_dict_output(schema, delimited_file_details):
    with open(delimited_file_details[0], 'r') as sf:
        p_dict_output = Parser(schema=schema,
                               stop_on_error=1,
                               parsed_row_format='dict')
        parsed_data = p_dict_output.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, dict)
            assert len(parsed_line.keys()) == 8
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 2
def test_fixed_input_fixed_width_output(fw_schema, fixed_width_file_name):
    with open(fixed_width_file_name, 'r') as sf:
        p_delimited_output = Parser(schema=fw_schema,
                                    stop_on_error=1,
                                    input_row_format='fixed-width',
                                    parsed_row_format='fixed-width')
        parsed_data = p_delimited_output.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, str)
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 2
def test_dict_input_dict_output(schema, json_lol):
    p_dict_output = Parser(schema=schema,
                           stop_on_error=-1,
                           input_row_format='json',
                           parsed_row_format='dict')
    parsed_data = p_dict_output.parse(json_lol)
    assert isinstance(parsed_data, types.GeneratorType)
    parsed_lines = []
    for parsed_line in parsed_data:  # calling data parsing on file object
        assert isinstance(parsed_line, dict)
        assert len(parsed_line.keys()) == 8
        parsed_lines.append(parsed_line)
    assert len(parsed_lines) == 1
示例#11
0
def test_json_input_as_file_object_one_error_allowed(schema, json_file_name):
    with open(json_file_name, 'r') as sf:
        p_allow_one_error = Parser(schema=schema,
                                   stop_on_error=-1,
                                   input_row_format='json',
                                   parsed_row_format="json")
        parsed_data = p_allow_one_error.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, str)
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 1
def test_json_input_json_output(schema, json_file_name):
    with open(json_file_name, 'r') as sf:
        p_json_output = Parser(schema=schema,
                               stop_on_error=-1,
                               input_row_format='json',
                               parsed_row_format='json')
        parsed_data = p_json_output.parse(sf)
        assert isinstance(parsed_data, types.GeneratorType)
        parsed_lines = []
        for parsed_line in parsed_data:  # calling data parsing on file object
            assert isinstance(parsed_line, str)
            assert len(json.loads(parsed_line).keys()) == 8
            parsed_lines.append(parsed_line)
        assert len(parsed_lines) == 1
def test_json_input_wrong_output(schema, json_lol):
    with pytest.raises(UnexpectedSystemException):
        Parser(schema=schema,
               stop_on_error=-1,
               input_row_format='json',
               parsed_row_format='delimited')
        Parser(schema=schema,
               stop_on_error=-1,
               input_row_format='json',
               parsed_row_format='fixed-width')
        Parser(schema=schema,
               stop_on_error=-1,
               input_row_format='dict',
               parsed_row_format='')
        p_json_output = Parser(schema=schema,
                               stop_on_error=-1,
                               input_row_format='json',
                               parsed_row_format='json')
        p_json_output.parse(json_lol)
def test_delimited_input_as_lol_no_error_allowed(schema, delimited_lol_details):
    p_allow_no_error = Parser(schema=schema, input_row_sep=delimited_lol_details[1])
    with pytest.raises(Exception):
        list(p_allow_no_error.parse(delimited_lol_details[0]))
示例#15
0
     StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')),
    ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])),
    ('INITIATED_ON', DatetimeParser(
        formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S']).convert('%Y/%m/%d').max_value(
            datetime.datetime.now()).min_value(value='20000101',
                                               format='%Y%m%d').not_null(
                                                   datetime.datetime.strptime(
                                                       '19001231', '%Y%m%d'))),
    ('ASKED_AMOUNT',
     IntegerParser().max_value(2000).not_null(default_value=0)),
    ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)),
    ('ROLE_MODEL', ConstantParser('Iron-Man')),
    ('BLOCK_NUMBER', IntegerParser().add_func(_parity_check).range(0, 40))
]

p = Parser(schema=schema, stop_on_error=1, parsed_row_format='dict')
# Creating temporary file for the example
with tempfile.NamedTemporaryFile() as tf:
    with open(tf.name, 'w') as sf:
        sf.writelines('""|Trig2020-23-12|A|20200123|2000|21.0934||10\n')
        sf.writelines('"DEF"||abc|||||34\n')
        sf.writelines('"DEF"|Manual_2020-23-12||2020-01-23 10:20:23|1200|11||')

    logging.info(('#' * 50) + " DATASET PARSING " + ('#' * 50))
    parsed_lines = []
    with open(tf.name, 'r') as sf:
        for parsed_line in p.parse(sf):  # calling data parsing on file object
            parsed_lines.append(parsed_line)
    logging.info("\n\n")
    logging.info(">>> Parsed Data:")
    logging.info(pprint.pformat(parsed_lines))
示例#16
0
def test_json_input_as_lol_no_error_allowed(schema, json_lol):
    p_allow_no_error = Parser(schema=schema,
                              input_row_format='json',
                              parsed_row_format="dict")
    with pytest.raises(Exception):
        list(p_allow_no_error.parse(json_lol))
def test_fixed_width_input_wrong_output(fw_schema):
    with pytest.raises(UnexpectedSystemException):
        Parser(schema=fw_schema, parsed_row_format='json')
# The cursors has to provided in a list, where each element of the list is a tuple.
# First element of each element is the column name, this is just for reference, no internal usage
# Second element of each tuple is the actual parser (parser objects, not built parser function)

logging.basicConfig(format='%(levelname)s:%(asctime)s:: %(message)s',
                    level=logging.DEBUG)
fw_schema = [
    ('ID', StringParser(1, 2)),
    ('NAME',
     StringParser(3, 5).change_case('U').not_null('nan',
                                                  allow_white_space=True)),
    ('GENDER', StringParser(6, 6).value_set(['M', 'F'])),
    ('NOT_NULLABLE_VALUE', StringParser(7, 11).not_null('dummy')),
    ('NULLABLE_VALUE', StringParser(7, 11)),
    ('BIRTH_YEAR', IntegerParser(12, 13).max_value(20)),
    ('BALANCE', FloatParser(14, 17).min_value(10.0))
]
p = Parser(schema=fw_schema,
           stop_on_error=1,
           input_row_format='fixed-width',
           parsed_row_format='dict')
logging.info(('#' * 50) + " FIXED WIDTH DATASET PARSING " + ('#' * 50))
parsed_data = p.parse(
    ['d0sauMvalue191000', 'd0pouM     2090.03', 'd0pouX     2090.03'])
logging.info("\n\n")
logging.info(">>> Parsed Data:")
for data in parsed_data:
    logging.info(pprint.pformat(data))
logging.info('#' * 125)
def test_delimited_input_wrong_output(schema):
    with pytest.raises(UnexpectedSystemException):
        Parser(schema=schema, parsed_row_format='json')
    with pytest.raises(UnexpectedSystemException):
        Parser(schema=schema, parsed_row_format='fixed_width')
示例#20
0
def test_fixed_width_input_as_lol_no_error_allowed(schema, fixed_width_lol):
    p_allow_no_error = Parser(schema=schema, input_row_format='fixed-width')
    with pytest.raises(Exception):
        list(p_allow_no_error.parse(fixed_width_lol))
def test_delimited_input_as_file_object_no_error_allowed(schema, delimited_file_details):
    with open(delimited_file_details[0], 'r') as sf:
        p_allow_no_error = Parser(schema=schema)
        with pytest.raises(Exception):
            list(p_allow_no_error.parse(sf))