示例#1
0
    def test_daterange_generator_reverse(self):
        """
        Tests the date range generator in reverse
        """

        # generate days from 10/01/1996 to 01/01/1996, in yyyy-mm-dd format
        start_date = datetime.date(1996, 1, 10)
        end_date = datetime.date(1996, 1, 1)
        expected_output = list(
            map(lambda x: "1996-01-" + str(x + 1).zfill(2),
                reversed(range(10))))
        date_gen = ParamInjector.generate_daterange("%Y-%m-%d", start_date,
                                                    end_date, "D")
        self.assertEqual(expected_output, list(date_gen))

        # generate months from 01/05/1996 to 01/01/1996, in yyyy/mm format
        start_date = datetime.date(1996, 5, 1)
        end_date = datetime.date(1996, 1, 1)
        expected_output = list(
            map(lambda x: "1996/" + str(x + 1).zfill(2), reversed(range(5))))
        date_gen = ParamInjector.generate_daterange("%Y/%m", start_date,
                                                    end_date, "M")
        self.assertEqual(expected_output, list(date_gen))

        # generate years from 01/01/2005 to 01/01/1996, in yy format
        start_date = datetime.date(2005, 1, 1)
        end_date = datetime.date(1996, 1, 1)
        expected_output = list(
            map(lambda x: str(x)[-2:], reversed(range(1996, 2006))))
        date_gen = ParamInjector.generate_daterange("%y", start_date, end_date)
        self.assertEqual(expected_output, list(date_gen))
示例#2
0
    def test_daterange_generator_error_invalid_range(self):
        """
        Tests the error case when no start/end date is supplied, or when either
        is invalid
        """

        # no end date
        start_date = datetime.date(1996, 1, 1)
        date_gen = ParamInjector.generate_daterange("", start_date, None)
        self.assertRaises(ValueError, list, date_gen)

        # no start date
        end_date = datetime.date(2005, 1, 1)
        date_gen = ParamInjector.generate_daterange("", None, end_date)
        self.assertRaises(ValueError, list, date_gen)

        # invalid start date
        start_date = "1996-01-01"
        end_date = datetime.date(2005, 1, 1)
        date_gen = ParamInjector.generate_daterange("", start_date, end_date)
        self.assertRaises(ValueError, list, date_gen)

        # invalid end date
        start_date = datetime.date(1996, 1, 1)
        end_date = "2004-12-31"
        date_gen = ParamInjector.generate_daterange("", start_date, end_date)
        self.assertRaises(ValueError, list, date_gen)
示例#3
0
    def test_daterange_generator_error_invalid_frequency(self):
        """
        Tests the error case when the supplied frequency is invalid
        """

        # frequency = None
        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(1996, 1, 10)
        date_gen = ParamInjector.generate_daterange("", start_date, end_date,
                                                    None)
        self.assertRaises(ValueError, list, date_gen)

        # frequency = "YEAR"
        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(1996, 1, 10)
        date_gen = ParamInjector.generate_daterange("", start_date, end_date,
                                                    "YEAR")
        self.assertRaises(ValueError, list, date_gen)

        # frequency = ""
        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(1996, 1, 10)
        date_gen = ParamInjector.generate_daterange("", start_date, end_date,
                                                    "")
        self.assertRaises(ValueError, list, date_gen)
示例#4
0
    def test_number_generator_corner_cases(self):
        """
        Tests some corner cases for the number generator
        """
        # empty range
        expected_output = []
        num_gen = ParamInjector.generate_num_sequence(1, 0)
        self.assertEqual(expected_output, list(num_gen))

        # empty with reversed first and last elements
        expected_output = []
        num_gen = ParamInjector.generate_num_sequence(0, 1, -1)
        self.assertEqual(expected_output, list(num_gen))

        # single element
        expected_output = ['0']
        num_gen = ParamInjector.generate_num_sequence(0, 0)
        self.assertEqual(expected_output, list(num_gen))

        # two elements with last > first
        expected_output = ['0', '1']
        num_gen = ParamInjector.generate_num_sequence(0, 1)
        self.assertEqual(expected_output, list(num_gen))

        # two elements with last < first
        expected_output = ['1', '0']
        num_gen = ParamInjector.generate_num_sequence(1, 0, -1)
        self.assertEqual(expected_output, list(num_gen))
示例#5
0
    def test_process_code_generator(self):
        """
        Tests generation of process codes
        """
        code_format = "{:07d}{:02d}{:04d}{:03d}{:04d}"
        param_limits = [(0, 5), (2018, 2019), [402], [0, 9999]]

        # 6 first sequential entries for each year, for each origin
        expected_output = ['00000007520184020000', '00000005120184029999',
                           '00000004120194020000', '00000001720194029999',
                           '00000016020184020000', '00000013620184029999',
                           '00000012620194020000', '00000010220194029999',
                           '00000024520184020000', '00000022120184029999',
                           '00000021120194020000', '00000028420194029999',
                           '00000033020184020000', '00000030620184029999',
                           '00000039320194020000', '00000036920194029999',
                           '00000041520184020000', '00000048820184029999',
                           '00000047820194020000', '00000045420194029999',
                           '00000059720184020000', '00000057320184029999',
                           '00000056320194020000', '00000053920194029999']

        proc_gen = ParamInjector.generate_format(code_format, param_limits, \
                        self.verif_code, 1)
        self.assertEqual(expected_output, list(proc_gen))

        # Generate the same codes, but in a different order (first vary the
        # sequential number, then the origin, then the year)
        code_format = "{4:07d}{1:02d}{0:04d}{2:03d}{3:04d}"
        param_limits = [(2018, 2019), [402], [0, 9999], (0, 5)]

        # 6 first sequential entries for each year, for each origin
        expected_output = ['00000007520184020000', '00000016020184020000',
                           '00000024520184020000', '00000033020184020000',
                           '00000041520184020000', '00000059720184020000',
                           '00000005120184029999', '00000013620184029999',
                           '00000022120184029999', '00000030620184029999',
                           '00000048820184029999', '00000057320184029999',
                           '00000004120194020000', '00000012620194020000',
                           '00000021120194020000', '00000039320194020000',
                           '00000047820194020000', '00000056320194020000',
                           '00000001720194029999', '00000010220194029999',
                           '00000028420194029999', '00000036920194029999',
                           '00000045420194029999', '00000053920194029999']

        proc_gen = ParamInjector.generate_format(code_format, param_limits, \
                        self.verif_code_switched, 1)
        self.assertEqual(expected_output, list(proc_gen))

        # Use an empty param_limits with a fixed code_format
        code_format = "test"
        param_limits = []

        expected_output = []

        proc_gen = ParamInjector.generate_format(code_format, param_limits)
        list(proc_gen)
        self.assertEqual(expected_output, list(proc_gen))
示例#6
0
    def test_alphabetic_generator_all(self):
        """
        Tests generation of alphabetic search patterns upper and lowercase
        """
        # one letter, one word
        lowercase_letters = [chr(97 + x) for x in range(0, 26)]
        uppercase_letters = [chr(65 + x) for x in range(0, 26)]
        expected_output = list(map(lambda x: x + "*", lowercase_letters))
        expected_output += list(map(lambda x: x + "*", uppercase_letters))
        pattern_gen = ParamInjector.generate_alpha(1, 1, False)
        self.assertEqual(expected_output, list(pattern_gen))

        # two letters, one word
        # check only the first 5 and last 5 entries
        expected_output_first = ["aa*", "ab*", "ac*", "ad*", "ae*"]
        expected_output_last = ["ZV*", "ZW*", "ZX*", "ZY*", "ZZ*"]
        pattern_gen = ParamInjector.generate_alpha(2, 1, False)

        # first 5 entries
        output = [next(pattern_gen) for i in range(5)]
        self.assertEqual(expected_output_first, output)

        # last 5 entries
        last_five = deque(pattern_gen, maxlen=5)
        output = [last_five.popleft() for i in range(5)]
        self.assertEqual(expected_output_last, output)

        # two letters, two words
        # check only the first 5 and last 5 entries
        expected_output_first = [
            "aa* aa*", "aa* ab*", "aa* ac*", "aa* ad*", "aa* ae*"
        ]
        expected_output_last = [
            "ZZ* ZV*", "ZZ* ZW*", "ZZ* ZX*", "ZZ* ZY*", "ZZ* ZZ*"
        ]
        pattern_gen = ParamInjector.generate_alpha(2, 2, False)

        # first 5 entries
        output = [next(pattern_gen) for i in range(5)]
        self.assertEqual(expected_output_first, output)

        # last 5 entries
        last_five = deque(pattern_gen, maxlen=5)
        output = [last_five.popleft() for i in range(5)]
        self.assertEqual(expected_output_last, output)
示例#7
0
    def test_process_code_error_invalid_param(self):
        """
        Tests the error case when the list of parameter limits contains invalid
        values
        """
        code_format = "{:07d}{:02d}{:04d}{:03d}{:04d}"
        param_limits = [(0, 5), (2018, 2019), [402], 9999]
        proc_gen = ParamInjector.generate_format(code_format, param_limits,
                                                 self.verif_code)

        # invalid param_limits (one of the entries is an integer)
        self.assertRaises(ValueError, next, proc_gen)

        param_limits = [(0, 5), (2018, 2019), [402], (1, 2, 3)]
        proc_gen = ParamInjector.generate_format(code_format, param_limits,
                                                 self.verif_code)
        # invalid param_limits (one of the entries is a tuple with 3 elements)
        self.assertRaises(ValueError, next, proc_gen)
示例#8
0
    def test_number_generator_simple(self):
        """
        Tests generation of simple linear sequence of numbers without padding
        """
        # stringified numbers from 0 to 9
        expected_output = list(map(str, range(10)))
        num_gen = ParamInjector.generate_num_sequence(0, 9, 1, False)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 0 to 99
        expected_output = list(map(str, range(100)))
        num_gen = ParamInjector.generate_num_sequence(0, 99, 1, False)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 0 to 100
        expected_output = list(map(str, range(101)))
        num_gen = ParamInjector.generate_num_sequence(0, 100, 1, False)
        self.assertEqual(expected_output, list(num_gen))
示例#9
0
    def test_number_generator_padding(self):
        """
        Tests generation of simple linear sequences of numbers with padding
        """
        # stringified numbers from 0 to 9
        expected_output = list(map(str, range(10)))
        num_gen = ParamInjector.generate_num_sequence(0, 9)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 0 to 99, each one with 2 digits
        expected_output = list(map(lambda x: str(x).zfill(2), range(100)))
        num_gen = ParamInjector.generate_num_sequence(0, 99)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 0 to 100, each one with 3 digits
        expected_output = list(map(lambda x: str(x).zfill(3), range(101)))
        num_gen = ParamInjector.generate_num_sequence(0, 100)
        self.assertEqual(expected_output, list(num_gen))
示例#10
0
    def test_const_generator(self):
        """
        Tests general functionality of the constant value generator
        """

        # Simple case
        input_data = "test"
        expected_output = ["test"]

        const_gen = ParamInjector.generate_constant(input_data)
        self.assertEqual(expected_output, list(const_gen))

        # No data
        input_data = ""
        expected_output = [""]

        const_gen = ParamInjector.generate_constant(input_data)
        self.assertEqual(expected_output, list(const_gen))
示例#11
0
    def test_daterange_generator_error_empty_format(self):
        """
        Tests the error case when the given date format is empty
        """

        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(2005, 1, 1)
        date_gen = ParamInjector.generate_daterange("", start_date, end_date)
        self.assertRaises(ValueError, list, date_gen)
示例#12
0
    def test_alphabetic_generator_error_corner_cases(self):
        """
        Tests some corner cases for the alphabetic generator
        """
        # two words with zero length
        pattern_gen = ParamInjector.generate_alpha(2, 0)
        self.assertRaises(ValueError, list, pattern_gen)

        # zero words with length two
        pattern_gen = ParamInjector.generate_alpha(0, 2)
        self.assertRaises(ValueError, list, pattern_gen)

        # negative value for length
        pattern_gen = ParamInjector.generate_alpha(0, -2)
        self.assertRaises(ValueError, list, pattern_gen)

        # negative value for word count
        pattern_gen = ParamInjector.generate_alpha(-2, 0)
        self.assertRaises(ValueError, list, pattern_gen)
示例#13
0
    def test_alphabetic_generator_lower(self):
        """
        Tests generation of alphabetic search patterns (all lowercase)
        """
        # one letter, one word
        lowercase_letters = [chr(97 + x) for x in range(0, 26)]
        expected_output = list(map(lambda x: x + "*", lowercase_letters))
        pattern_gen = ParamInjector.generate_alpha(1, 1)
        self.assertEqual(expected_output, list(pattern_gen))

        # two letters, one word
        # check only the first 5 and last 5 entries
        expected_output_first = ["aa*", "ab*", "ac*", "ad*", "ae*"]
        expected_output_last = ["zv*", "zw*", "zx*", "zy*", "zz*"]
        pattern_gen = ParamInjector.generate_alpha(2, 1)

        # first 5 entries
        output = [next(pattern_gen) for i in range(5)]
        self.assertEqual(expected_output_first, output)

        # last 5 entries
        last_five = deque(pattern_gen, maxlen=5)
        output = [last_five.popleft() for i in range(5)]
        self.assertEqual(expected_output_last, output)

        # two letters, two words
        # check only the first 5 and last 5 entries
        expected_output_first = [
            "aa* aa*", "aa* ab*", "aa* ac*", "aa* ad*", "aa* ae*"
        ]
        expected_output_last = [
            "zz* zv*", "zz* zw*", "zz* zx*", "zz* zy*", "zz* zz*"
        ]
        pattern_gen = ParamInjector.generate_alpha(2, 2)

        # first 5 entries
        output = [next(pattern_gen) for i in range(5)]
        self.assertEqual(expected_output_first, output)

        # last 5 entries
        last_five = deque(pattern_gen, maxlen=5)
        output = [last_five.popleft() for i in range(5)]
        self.assertEqual(expected_output_last, output)
示例#14
0
    def test_number_generator_reverse(self):
        """
        Tests generation of sequences of numbers in reverse
        """
        # stringified numbers from 9 to 0
        expected_output = list(map(str, range(10)))
        expected_output.reverse()
        num_gen = ParamInjector.generate_num_sequence(9, 0, -1, False)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 99 to 0
        expected_output = list(map(str, range(100)))
        expected_output.reverse()
        num_gen = ParamInjector.generate_num_sequence(99, 0, -1, False)
        self.assertEqual(expected_output, list(num_gen))

        # stringified numbers from 100 to 0
        expected_output = list(map(str, range(101)))
        expected_output.reverse()
        num_gen = ParamInjector.generate_num_sequence(100, 0, -1, False)
        self.assertEqual(expected_output, list(num_gen))
示例#15
0
    def test_list_generator(self):
        """
        Tests general functionality of the predefined list generator
        """

        # Simple case
        list_str = "a,b,c,d,e"
        expected_output = ["a", "b", "c", "d", "e"]

        list_gen = ParamInjector.generate_list(list_str)
        self.assertEqual(expected_output, list(list_gen))

        # Another simple case
        list_str = "alice,bob,charlie,david,emily"
        expected_output = ["alice", "bob", "charlie", "david", "emily"]

        list_gen = ParamInjector.generate_list(list_str)
        self.assertEqual(expected_output, list(list_gen))

        # Leading and trailing spaces are ignored
        list_str = "a  ,  b  ,c, d, e"
        expected_output = ["a", "b", "c", "d", "e"]

        list_gen = ParamInjector.generate_list(list_str)
        self.assertEqual(expected_output, list(list_gen))

        # No elements
        list_str = ""
        expected_output = [""]

        list_gen = ParamInjector.generate_list(list_str)
        self.assertEqual(expected_output, list(list_gen))

        # Single element
        list_str = "singletest"
        expected_output = ["singletest"]

        list_gen = ParamInjector.generate_list(list_str)
        self.assertEqual(expected_output, list(list_gen))
示例#16
0
    def test_process_code_error_verif_index(self):
        """
        Tests the error case when a verification function is defined but no
        index is supplied for the position of this verification function
        """
        code_format = "{:07d}{:02d}{:04d}{:03d}{:04d}"
        param_limits = [(0, 5), (2018, 2019), [402], [0, 9999]]
        proc_gen = ParamInjector.generate_format(code_format, param_limits,
                                                 self.verif_code)

        # try to generate one process code entry without the verification digit
        # index
        self.assertRaises(ValueError, next, proc_gen)
示例#17
0
    def test_daterange_generator_limits(self):
        """
        Tests the date range generator's behaviour in some corner cases where
        the start/end dates could cause off-by-one errors
        """

        # generate months from 01/01/1996 to 30/04/1996, in yyyy/mm format
        # should include Jan to Apr, without May
        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(1996, 4, 30)
        expected_output = list(
            map(lambda x: "1996/" + str(x + 1).zfill(2), range(4)))
        date_gen = ParamInjector.generate_daterange("%Y/%m", start_date,
                                                    end_date, "M")
        self.assertEqual(expected_output, list(date_gen))

        # generate years from 01/01/1996 to 31/12/2004, in yy format
        # should include 1996 to 2004, without 2005
        start_date = datetime.date(1996, 1, 1)
        end_date = datetime.date(2004, 12, 31)
        expected_output = list(map(lambda x: str(x)[-2:], range(1996, 2005)))
        date_gen = ParamInjector.generate_daterange("%y", start_date, end_date)
        self.assertEqual(expected_output, list(date_gen))
示例#18
0
def create_parameter_generators(probe, parameter_handlers, filter_limits=True):
    """
    Loads the parameter information and creates a list of the respective
    generators from the ParamInjector module, while filtering the ranges as
    necessary
    """

    url_injectors = []
    initial_values = []

    for i in [1, 2]:
        # We run this code twice: the first pass will get the initial
        # values for each parameter, which is used in the second pass to
        # filter the ends of the limits as required
        # I couldn't find a nicer way to do this

        if not filter_limits and i == 2:
            # Don't filter limits unless required
            break

        for param_index, param in enumerate(parameter_handlers):
            param_type = param['parameter_type']
            param_gen = None

            if i == 2 and not param['filter_range']:
                # We are running the "filtering" pass but this parameter
                # should not be filtered
                continue

            entries_list = []
            cons_misses = None
            if i == 2:
                # Configure the list of extra parameters for the range
                # inference
                entries_list = initial_values.copy()
                entries_list[param_index] = None
                cons_misses = int(param['cons_misses'])

            if param_type == "process_code":
                PROCESS_FORMAT = '{:07d}-{:02d}.{:04d}.{}.{:02d}.{:04d}'

                first_year = int(param['first_year_proc_param'])
                last_year = int(param['last_year_proc_param'])
                segment_ids = param['segment_ids_proc_param'].split(",")
                court_ids = param['court_ids_proc_param'].split(",")
                origin_ids = param['origin_ids_proc_param'].split(",")

                # turn string lists into integers
                segment_ids = list(map(int, segment_ids))
                court_ids = list(map(int, court_ids))
                origin_ids = list(map(int, origin_ids))

                max_seq = 9999999
                if i == 2:
                    # Filter the process_code range
                    max_seq = RangeInference.filter_process_code(
                        first_year,
                        last_year,
                        segment_ids,
                        court_ids,
                        origin_ids,
                        probe,
                        entries_list,
                        cons_misses=cons_misses)

                subparam_list = [
                    # sequential identifier
                    (0, max_seq),
                    # year
                    (first_year, last_year),
                    # segment identifiers
                    segment_ids,
                    # court identifiers
                    court_ids,
                    # origin identifiers
                    origin_ids
                ]

                param_gen = ParamInjector.generate_format(
                    code_format=PROCESS_FORMAT,
                    param_limits=subparam_list,
                    verif=ParamInjector.process_code_verification,
                    verif_index=1)

            elif param_type == "number_seq":
                begin = int(param['first_num_param'])
                end = int(param['last_num_param'])

                if i == 2:
                    # Filter the number range
                    end = RangeInference.filter_numeric_range(
                        begin,
                        end,
                        probe,
                        entries_list,
                        cons_misses=cons_misses)

                param_gen = ParamInjector.generate_num_sequence(
                    first=begin,
                    last=end,
                    step=int(param['step_num_param']),
                    leading=param['leading_num_param'],
                )
            elif param_type == 'date_seq':
                begin = datetime.date.fromisoformat(
                    param['start_date_date_param'])
                end = datetime.date.fromisoformat(param['end_date_date_param'])
                frequency = param['frequency_date_param']
                date_format = param['date_format_date_param']

                if i == 2:
                    # Filter the date range
                    end = RangeInference.filter_daterange(
                        begin,
                        end,
                        probe,
                        frequency,
                        date_format,
                        entries_list,
                        cons_misses=cons_misses)

                param_gen = ParamInjector.generate_daterange(
                    date_format=date_format,
                    start_date=begin,
                    end_date=end,
                    frequency=frequency,
                )
            elif param_type == 'alpha_seq':
                # We don't do anything diferent here if it's the second
                # pass, since alphabetic sequences can't be filtered

                length = int(param['length_alpha_param'])
                num_words = int(param['num_words_alpha_param'])
                no_upper = param['no_upper_alpha_param']

                param_gen = ParamInjector.generate_alpha(length=length,
                                                         num_words=num_words,
                                                         no_upper=no_upper)
            elif param_type == 'value_list':
                # No filtering applied to this parameter
                list_values = param['value_list_param']

                param_gen = ParamInjector.generate_list(elements=list_values)
            elif param_type == 'const_value':
                # No filtering applied to this parameter
                const_value = param['value_const_param']

                param_gen = ParamInjector.generate_constant(value=const_value)
            else:
                raise ValueError(f"Invalid parameter type: {param_type}")

            if i == 2 and param_gen is not None:
                # We have filtered the range for this parameter, and should
                # update the generator in the list
                url_injectors[param_index] = param_gen
            else:
                # Create a copy of the generator, to extract the first
                # value. After that, add to the list of parameter
                # generators
                param_gen, param_gen_first = itertools.tee(param_gen)
                initial_values.append(next(param_gen_first))
                url_injectors.append(param_gen)

    return url_injectors
示例#19
0
 def preprocess(entry):
     entries_list = initial_values.copy()
     entries_list[index] = entry
     return ParamInjector.format_params(code_format,
                                        tuple(entries_list),
                                        verif, verif_index)
示例#20
0
 def verif(seq):
     return ParamInjector.process_code_verification(
         seq, year, segment, court, origin)