def test_fixed_width_input_as_file_object_no_error_allowed( schema, fixed_width_file_name): with open(fixed_width_file_name, 'r') as sf: p_allow_no_error = Parser(schema=schema, input_row_format='fixed-width') with pytest.raises(Exception): list(p_allow_no_error.parse(sf))
def test_json_input_as_file_object_no_error_allowed(schema, json_file_name): with open(json_file_name, 'r') as sf: p_allow_no_error = Parser(schema=schema, input_row_format='json', parsed_row_format="json") with pytest.raises(Exception): list(p_allow_no_error.parse(sf))
def test_delimited_input_as_lol_all_error_allowed(schema, delimited_bad_lol_details): p_allow_all_error = Parser(schema=schema, stop_on_error=-1, input_row_sep=delimited_bad_lol_details[1]) parsed_data = p_allow_all_error.parse(delimited_bad_lol_details[0]) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_delimited_input_as_file_object_one_error_allowed(schema, delimited_file_details): with open(delimited_file_details[0], 'r') as sf: p_allow_one_error = Parser(schema=schema, stop_on_error=1) parsed_data = p_allow_one_error.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 2
def test_delimited_input_as_lol_all_error_allowed(schema, fixed_width_bad_lol): p_allow_all_error = Parser(schema=schema, stop_on_error=-1, input_row_format='fixed-width') parsed_data = p_allow_all_error.parse(fixed_width_bad_lol) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_json_input_as_lol_all_error_allowed(schema, json_bad_lol): p_allow_all_error = Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format="dict") parsed_data = p_allow_all_error.parse(json_bad_lol) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, dict) parsed_lines.append(parsed_line) assert len(parsed_lines) == 0
def test_fixed_width_input_as_file_object_all_error_allowed( schema, fixed_width_bad_file_name): with open(fixed_width_bad_file_name, 'r') as sf: p_allow_all_error = Parser(schema=schema, stop_on_error=-1, input_row_format='fixed-width') parsed_data = p_allow_all_error.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_delimited_input_dict_output(schema, delimited_file_details): with open(delimited_file_details[0], 'r') as sf: p_dict_output = Parser(schema=schema, stop_on_error=1, parsed_row_format='dict') parsed_data = p_dict_output.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, dict) assert len(parsed_line.keys()) == 8 parsed_lines.append(parsed_line) assert len(parsed_lines) == 2
def test_fixed_input_fixed_width_output(fw_schema, fixed_width_file_name): with open(fixed_width_file_name, 'r') as sf: p_delimited_output = Parser(schema=fw_schema, stop_on_error=1, input_row_format='fixed-width', parsed_row_format='fixed-width') parsed_data = p_delimited_output.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 2
def test_dict_input_dict_output(schema, json_lol): p_dict_output = Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format='dict') parsed_data = p_dict_output.parse(json_lol) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, dict) assert len(parsed_line.keys()) == 8 parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_json_input_as_file_object_one_error_allowed(schema, json_file_name): with open(json_file_name, 'r') as sf: p_allow_one_error = Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format="json") parsed_data = p_allow_one_error.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_json_input_json_output(schema, json_file_name): with open(json_file_name, 'r') as sf: p_json_output = Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format='json') parsed_data = p_json_output.parse(sf) assert isinstance(parsed_data, types.GeneratorType) parsed_lines = [] for parsed_line in parsed_data: # calling data parsing on file object assert isinstance(parsed_line, str) assert len(json.loads(parsed_line).keys()) == 8 parsed_lines.append(parsed_line) assert len(parsed_lines) == 1
def test_json_input_wrong_output(schema, json_lol): with pytest.raises(UnexpectedSystemException): Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format='delimited') Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format='fixed-width') Parser(schema=schema, stop_on_error=-1, input_row_format='dict', parsed_row_format='') p_json_output = Parser(schema=schema, stop_on_error=-1, input_row_format='json', parsed_row_format='json') p_json_output.parse(json_lol)
def test_delimited_input_as_lol_no_error_allowed(schema, delimited_lol_details): p_allow_no_error = Parser(schema=schema, input_row_sep=delimited_lol_details[1]) with pytest.raises(Exception): list(p_allow_no_error.parse(delimited_lol_details[0]))
StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')), ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])), ('INITIATED_ON', DatetimeParser( formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S']).convert('%Y/%m/%d').max_value( datetime.datetime.now()).min_value(value='20000101', format='%Y%m%d').not_null( datetime.datetime.strptime( '19001231', '%Y%m%d'))), ('ASKED_AMOUNT', IntegerParser().max_value(2000).not_null(default_value=0)), ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)), ('ROLE_MODEL', ConstantParser('Iron-Man')), ('BLOCK_NUMBER', IntegerParser().add_func(_parity_check).range(0, 40)) ] p = Parser(schema=schema, stop_on_error=1, parsed_row_format='dict') # Creating temporary file for the example with tempfile.NamedTemporaryFile() as tf: with open(tf.name, 'w') as sf: sf.writelines('""|Trig2020-23-12|A|20200123|2000|21.0934||10\n') sf.writelines('"DEF"||abc|||||34\n') sf.writelines('"DEF"|Manual_2020-23-12||2020-01-23 10:20:23|1200|11||') logging.info(('#' * 50) + " DATASET PARSING " + ('#' * 50)) parsed_lines = [] with open(tf.name, 'r') as sf: for parsed_line in p.parse(sf): # calling data parsing on file object parsed_lines.append(parsed_line) logging.info("\n\n") logging.info(">>> Parsed Data:") logging.info(pprint.pformat(parsed_lines))
def test_json_input_as_lol_no_error_allowed(schema, json_lol): p_allow_no_error = Parser(schema=schema, input_row_format='json', parsed_row_format="dict") with pytest.raises(Exception): list(p_allow_no_error.parse(json_lol))
def test_fixed_width_input_wrong_output(fw_schema): with pytest.raises(UnexpectedSystemException): Parser(schema=fw_schema, parsed_row_format='json')
# The cursors has to provided in a list, where each element of the list is a tuple. # First element of each element is the column name, this is just for reference, no internal usage # Second element of each tuple is the actual parser (parser objects, not built parser function) logging.basicConfig(format='%(levelname)s:%(asctime)s:: %(message)s', level=logging.DEBUG) fw_schema = [ ('ID', StringParser(1, 2)), ('NAME', StringParser(3, 5).change_case('U').not_null('nan', allow_white_space=True)), ('GENDER', StringParser(6, 6).value_set(['M', 'F'])), ('NOT_NULLABLE_VALUE', StringParser(7, 11).not_null('dummy')), ('NULLABLE_VALUE', StringParser(7, 11)), ('BIRTH_YEAR', IntegerParser(12, 13).max_value(20)), ('BALANCE', FloatParser(14, 17).min_value(10.0)) ] p = Parser(schema=fw_schema, stop_on_error=1, input_row_format='fixed-width', parsed_row_format='dict') logging.info(('#' * 50) + " FIXED WIDTH DATASET PARSING " + ('#' * 50)) parsed_data = p.parse( ['d0sauMvalue191000', 'd0pouM 2090.03', 'd0pouX 2090.03']) logging.info("\n\n") logging.info(">>> Parsed Data:") for data in parsed_data: logging.info(pprint.pformat(data)) logging.info('#' * 125)
def test_delimited_input_wrong_output(schema): with pytest.raises(UnexpectedSystemException): Parser(schema=schema, parsed_row_format='json') with pytest.raises(UnexpectedSystemException): Parser(schema=schema, parsed_row_format='fixed_width')
def test_fixed_width_input_as_lol_no_error_allowed(schema, fixed_width_lol): p_allow_no_error = Parser(schema=schema, input_row_format='fixed-width') with pytest.raises(Exception): list(p_allow_no_error.parse(fixed_width_lol))
def test_delimited_input_as_file_object_no_error_allowed(schema, delimited_file_details): with open(delimited_file_details[0], 'r') as sf: p_allow_no_error = Parser(schema=schema) with pytest.raises(Exception): list(p_allow_no_error.parse(sf))