def test_valid_data_parsing_type_preserved(input_data, input_format): if input_format: func = DatetimeParser(formats=[input_format], enforce_type=False).build() else: func = DatetimeParser(enforce_type=False).build() assert func(input_data) == input_data
def test_enforce_type(): func = DatetimeParser(enforce_type=False).build() data = "20200101" assert func(data) == "20200101" func = DatetimeParser(enforce_type=True).build() data = "20200101" assert func(data) == datetime.datetime.strptime('20200101', '%Y%m%d')
def test_invalid_data_parsing(input_data, input_format): if input_format: func = DatetimeParser(formats=[input_format], enforce_type=False).build() else: func = DatetimeParser(enforce_type=False).build() with pytest.raises(DateTimeParsingException): assert func(input_data)
def test_min_value_validator(): func = DatetimeParser(formats=['%Y-%m-%d']).min_value( value='01/01/2020', format='%d/%m/%Y').build() assert func('2020-12-31') assert func('2020-01-01') with pytest.raises(MinimumValueConstraintException): assert func('2019-12-31') with pytest.raises(UnexpectedParsingException): DatetimeParser().max_value("300").build()
def test_not_null_validator(): func = DatetimeParser(quoted=0, enforce_type=False).not_null().build() assert func("20200101") == "20200101" with pytest.raises(NullValueInNotNullFieldException): assert func(None) assert func("") == "" # Default value assignment check func = DatetimeParser().not_null(default_value="20200101", format='%Y%m%d').build() assert func(None) == datetime.datetime.strptime('20200101', '%Y%m%d') assert func("") == datetime.datetime.strptime('20200101', '%Y%m%d')
def test_valid_data_parsing_type_converted(input_data, input_format): if input_format: func = DatetimeParser(formats=[input_format]).build() else: func = DatetimeParser().build() if not input_format: if len(input_data) == 8: input_format = '%Y%m%d' else: input_format = '%Y%m%d%H%M%S' assert func(input_data) == datetime.datetime.strptime( input_data, input_format)
def test_value_set_validator(): allowed_values = [ datetime.datetime.strptime('20200101', '%Y%m%d'), datetime.datetime.strptime('20200102', '%Y%m%d') ] func = DatetimeParser().value_set(allowed_values).build() assert func('20200101') == datetime.datetime.strptime('20200101', '%Y%m%d') with pytest.raises(ValidValueCheckException): assert func('20200103')
def test_range_validator(): func = DatetimeParser(formats=['%Y-%m-%d']).range( lower_bound='01/01/2020', upper_bound=datetime.datetime.strptime('20201231', '%Y%m%d'), format='%d/%m/%Y').build() assert func('2020-01-01') assert func('2020-12-31') assert func('2020-06-30') with pytest.raises(MinimumValueConstraintException): assert func('2019-12-31') with pytest.raises(MaximumValueConstraintException): assert func('2021-01-01')
def _schema(custom_function): return [ ('ID', StringParser(quoted=1)), ('RUN_ID', StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')), ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])), ('INITIATED_ON', DatetimeParser(formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S']) .convert('%Y/%m/%d').max_value(datetime.datetime.now()) .min_value(value='20000101', format='%Y%m%d') .not_null(datetime.datetime.strptime('19001231', '%Y%m%d')) ), ('ASKED_AMOUNT', IntegerParser().max_value(2000).not_null(default_value=0)), ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)), ('ROLE_MODEL', ConstantParser('Leo Messi')), ('BLOCK_NUMBER', IntegerParser().add_func(custom_function).range(0, 40)) ]
def test_single_quoted_data(): func = DatetimeParser(quoted=2).build() data = "'20200101'" assert func(data) == datetime.datetime.strptime('20200101', '%Y%m%d')
def test_double_quoted_data(): func = DatetimeParser(quoted=1).build() data = '"20200101"' assert func(data) == datetime.datetime.strptime('20200101', '%Y%m%d')
def test_non_quoted_data(): func = DatetimeParser(enforce_type=False).build() data = "20200101" assert func(data) == "20200101" int_data = 20200101 assert func(int_data) == 20200101
def test_convert(): func = DatetimeParser().convert('%Y|%m|%d').build() assert func("20200501") == "2020|05|01"
def test_add_func_validator(): func = DatetimeParser().add_func(_get_month_value).build() assert func("20200501") == 5
# Import required parser class from parseval.parser import DatetimeParser import datetime import logging logging.basicConfig(format='%(levelname)s:%(asctime)s:: %(message)s', level=logging.DEBUG) basic_parser = DatetimeParser(formats=['%Y%m%d']) # Create basic parser object basic_parse_func = basic_parser.build() # Build the parser function input_data = "20200101" # Input Data basic_parsed_output = basic_parse_func(input_data) # Parse data logging.info(('#' * 50) + " DATETIME PARSING " + ('#' * 50)) logging.info("====> Simple Data Parsing example:") logging.info("Input 1: {}".format(input_data)) logging.info("Output 1: {}".format(basic_parsed_output)) logging.info('\n') # Now let's see some available validators, to get the idea of how to use those # Note, we will not go through all available validators, because all validators work in same fashion # Syntax and description of all validators are available in documentation default_date = datetime.datetime.strptime("20200101", "%Y%m%d") min_date = datetime.datetime.strptime("20200101", "%Y%m%d") max_date = datetime.datetime.strptime("20200831", "%Y%m%d") validation_parser = DatetimeParser()\ .range(min_date, max_date)\ .not_null(default_value=default_date) # null check validation and allowed values validation is added validation_parse_func = validation_parser( ) # Yes, you can directly call the object to build the parser more_generic_parser = DatetimeParser(formats=['%Y%m%d', '%Y%m%d %H%M%S'
if i_data % 2 != 0: raise Exception("The data has to be even!") return data # The cursors has to provided in a list, where each element of the list is a tuple. # First element of each element is the column name, this is just for reference, no internal usage # Second element of each tuple is the actual parser (parser objects, not built parser function) schema = [ ('ID', StringParser(quoted=1)), ('RUN_ID', StringParser().regex_match(r'\w+_\d{4}-\d{2}-\d{2}').change_case('u')), ('CLASS', StringParser(start=1, end=1).value_set(['a', 'b', 'A'])), ('INITIATED_ON', DatetimeParser( formats=['%Y%m%d', '%Y-%m-%d %H:%M:%S']).convert('%Y/%m/%d').max_value( datetime.datetime.now()).min_value(value='20000101', format='%Y%m%d').not_null( datetime.datetime.strptime( '19001231', '%Y%m%d'))), ('ASKED_AMOUNT', IntegerParser().max_value(2000).not_null(default_value=0)), ('ADJUSTED_AMOUNT', FloatParser().min_value(10.0).not_null(0.0)), ('ROLE_MODEL', ConstantParser('Iron-Man')), ('BLOCK_NUMBER', IntegerParser().add_func(_parity_check).range(0, 40)) ] p = Parser(schema=schema, stop_on_error=1, parsed_row_format='dict') # Creating temporary file for the example with tempfile.NamedTemporaryFile() as tf: with open(tf.name, 'w') as sf: sf.writelines('""|Trig2020-23-12|A|20200123|2000|21.0934||10\n') sf.writelines('"DEF"||abc|||||34\n')