Пример #1
0
    def test_numeric_corner_cases(self):
        """
        Tests the behavior of the numerical range inference in some corner cases
        """

        # Empty range (no entries in the original search space)
        hit_check = RangeInferenceTest.dummy_hit_check(0, -1)
        # Yearly
        last_entry = RangeInference.filter_numeric_range(0, 100, hit_check, 10)
        self.assertIsNone(last_entry)

        # Entire search space is filled
        hit_check = RangeInferenceTest.dummy_hit_check(0, 123)
        last_entry = RangeInference.filter_numeric_range(0, 123, hit_check, 10)
        self.assertEqual(last_entry, 123)

        # Only one entry at the beginning
        hit_check = RangeInferenceTest.dummy_hit_check(0, 0)
        last_entry = RangeInference.filter_numeric_range(0, 123, hit_check, 10)
        self.assertEqual(last_entry, 0)

        # Only one entry near the beginning
        hit_check = RangeInferenceTest.dummy_hit_check(5, 5)
        last_entry = RangeInference.filter_numeric_range(0, 123, hit_check, 10)
        self.assertEqual(last_entry, 5)
Пример #2
0
    def test_formatted_code_corner_cases(self):
        """
        Tests the behavior of the formatted code inference in some corner cases
        """

        # Entire search space is filled
        first_begin = 1
        first_end = 100
        second_begin = 1
        second_end = 100
        entry_probe = RangeInferenceTest.dummy_code_probe(
            "-", [first_end, second_end])
        result = RangeInference.filter_formatted_code(
            "{:02}-{:02}", [(first_begin, first_end),
                            (second_begin, second_end)], [True, True],
            entry_probe)
        final_values = list(map(lambda x: x[-1], result))
        self.assertEqual(final_values, [first_end, second_end])

        # Only one entry at the beginning
        entry_probe = RangeInferenceTest.dummy_code_probe("-", [1, 1])
        result = RangeInference.filter_formatted_code(
            "{:02}-{:02}", [(first_begin, first_end),
                            (second_begin, second_end)], [True, True],
            entry_probe)
        final_values = list(map(lambda x: x[-1], result))
        self.assertEqual(final_values, [1, 1])
Пример #3
0
    def test_numeric_range_inference(self):
        """
        Tests simple numeric ranges
        """

        # Last entry at position number 50
        hit_check = RangeInferenceTest.dummy_hit_check(0, 50)
        last_entry = RangeInference.filter_numeric_range(0, 200, hit_check, 10)
        self.assertEqual(last_entry, 50)

        # Interval beginning at 25 and ending at 48
        hit_check = RangeInferenceTest.dummy_hit_check(25, 48)
        last_entry = RangeInference.filter_numeric_range(0, 200, hit_check, 10)
        self.assertEqual(last_entry, 48)

        # Case where our module is unable to work: there is a gap in the
        # sequence, and it is larger than the cons_misses parameter
        # Create two interval checks
        hit_check_25_48 = RangeInferenceTest.dummy_hit_check(25, 48)
        hit_check_90_95 = RangeInferenceTest.dummy_hit_check(90, 95)

        # Test against both intervals and returns True if it belongs to any of
        # them
        def hit_check(x):
            return (hit_check_25_48(x) or hit_check_90_95(x))

        last_entry = RangeInference.filter_numeric_range(0, 200, hit_check, 10)
        self.assertEqual(last_entry, 48)

        # Same case as above, but with a higher value for cons_misses, in which
        # case the module finds the correct answer
        last_entry = RangeInference.filter_numeric_range(0, 200, hit_check, 50)
        self.assertEqual(last_entry, 95)
Пример #4
0
    def test_numeric_range_inference(self):
        """
        Tests simple numeric ranges
        """

        # Last entry at position number 50
        entry_probe = RangeInferenceTest.dummy_entry_probe(0, 50)
        last_entry = RangeInference.filter_numeric_range(0,
                                                         200,
                                                         entry_probe,
                                                         cons_misses=10)
        self.assertEqual(last_entry, 50)

        # Interval beginning at 25 and ending at 48
        entry_probe = RangeInferenceTest.dummy_entry_probe(25, 48)
        last_entry = RangeInference.filter_numeric_range(0,
                                                         200,
                                                         entry_probe,
                                                         cons_misses=10)
        self.assertEqual(last_entry, 48)

        # Simple case similar to the first, but with extra parameters
        entry_probe = RangeInferenceTest.dummy_entry_probe(0, 50)
        last_entry = RangeInference.filter_numeric_range(0,
                                                         200,
                                                         entry_probe,
                                                         [None, 2, 'test'],
                                                         cons_misses=10)
        self.assertEqual(last_entry, 50)

        # Case where our module is unable to work: there is a gap in the
        # sequence, and it is larger than the cons_misses parameter
        # Create two interval checks
        def check_25_48(x):
            return 25 <= x[0] <= 48

        def check_90_95(x):
            return 90 <= x[0] <= 95

        # Test against both intervals and returns True if it belongs to any of
        # them
        def check_all(x):
            return (check_25_48(x) or check_90_95(x))

        # Create a specific mock of EntryProbing for this
        entry_probe = mock.Mock(spec=EntryProbing, check_entry=check_all)
        last_entry = RangeInference.filter_numeric_range(0,
                                                         200,
                                                         entry_probe,
                                                         cons_misses=10)
        self.assertEqual(last_entry, 48)

        # Same case as above, but with a higher value for cons_misses, in which
        # case the module finds the correct answer
        last_entry = RangeInference.filter_numeric_range(0,
                                                         200,
                                                         entry_probe,
                                                         cons_misses=50)
        self.assertEqual(last_entry, 95)
Пример #5
0
    def test_date_corner_cases(self):
        """
        Tests the behavior of the date range inference in some corner cases
        """

        # Empty range (no entries in the original search space)
        int_begin = date(2010, 1, 1)
        int_end = date(2009, 1, 1)
        begin = date(2000, 1, 1)
        end = date(2020, 1, 1)
        entry_probe = RangeInferenceTest.dummy_entry_probe(int_begin, int_end)
        # Yearly
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'Y', 10)
        self.assertIsNone(last_entry)
        # Monthly
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'M', 10)
        self.assertIsNone(last_entry)
        # Daily
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'D', 10)
        self.assertIsNone(last_entry)

        # Entire search space is filled
        entry_probe = RangeInferenceTest.dummy_entry_probe(begin, end)
        # Yearly
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'Y', 10)
        self.assertEqual(last_entry.year, end.year)
        # Monthly
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'M', 10)
        self.assertEqual(last_entry.year, end.year)
        self.assertEqual(last_entry.month, end.month)
        # Daily
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'D', 10)
        self.assertEqual(last_entry, end)

        # Only one entry at the beginning
        entry_probe = RangeInferenceTest.dummy_entry_probe(begin, begin)
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'D', 10)
        self.assertEqual(last_entry, begin)

        # Only one entry near the beginning
        entry_date = date(2000, 1, 5)
        entry_probe = RangeInferenceTest.dummy_entry_probe(
            entry_date, entry_date)
        last_entry = RangeInference.filter_daterange(begin, end, entry_probe,
                                                     'D', 10)
        self.assertEqual(last_entry, entry_date)
Пример #6
0
    def test_formatted_code_inference(self):
        """
        Tests simple formatted codes
        """

        # 01-01 to 10-10
        first_begin = 1
        first_end = 100
        first_last_entry = 10
        second_begin = 1
        second_last_entry = 10
        second_end = 100
        entry_probe = RangeInferenceTest.dummy_code_probe(
            "-", [first_last_entry, second_last_entry])
        result = RangeInference.filter_formatted_code(
            "{:02}-{:02}", [(first_begin, first_end),
                            (second_begin, second_end)], [True, True],
            entry_probe)
        final_values = list(map(lambda x: x[-1], result))
        self.assertEqual(final_values, [first_last_entry, second_last_entry])

        # Same as above, but only filter the first parameter
        result = RangeInference.filter_formatted_code(
            "{:02}-{:02}", [(first_begin, first_end),
                            (second_begin, second_end)], [True, False],
            entry_probe)
        final_values = list(map(lambda x: x[-1], result))
        self.assertEqual(final_values, [first_last_entry, second_end])

        # Now only filter the second parameter
        result = RangeInference.filter_formatted_code(
            "{:02}-{:02}", [(first_begin, first_end),
                            (second_begin, second_end)], [False, True],
            entry_probe)
        final_values = list(map(lambda x: x[-1], result))
        self.assertEqual(final_values, [first_end, second_last_entry])
Пример #7
0
    def test_process_code_inference(self):
        """
        Tests inference of process codes
        """

        first_year = 2010
        last_year = 2020
        segment_ids = [4]
        court_ids = [2]
        origin_ids = [0, 9999]

        LAST_VAL = 20

        def check(x):
            return int(x[0].split("-")[0]) <= LAST_VAL

        entry_probe = mock.Mock(spec=EntryProbing, check_entry=check)

        result = RangeInference.filter_process_code(first_year, last_year,
                                                    segment_ids, court_ids,
                                                    origin_ids, entry_probe)
        self.assertEqual(result, LAST_VAL)
Пример #8
0
    def test_daterange_inference(self):
        """
        Tests simple date ranges
        """

        # Last entry at day 01/01/2012
        int_begin = date(2010, 1, 1)
        int_end = date(2012, 1, 1)
        end = date(2020, 1, 1)
        hit_check = RangeInferenceTest.dummy_hit_check(int_begin, int_end)
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'Y', 10)
        # Since we're using yearly resolution, only the year value matters
        self.assertEqual(last_entry.year, int_end.year)

        # Same as above but with monthly resolution
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'M', 10)
        # Using monthly resolution, check year and month
        self.assertEqual(last_entry.year, int_end.year)
        self.assertEqual(last_entry.month, int_end.month)

        # Same as above but with daily resolution
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'D', 10)
        # Since now the resolution is daily, we can just compare the results
        self.assertEqual(last_entry, int_end)

        # Repeating the tests above, but now with int_end at 31/12/2011, to
        # catch off-by-one errors
        int_end = date(2011, 12, 31)
        hit_check = RangeInferenceTest.dummy_hit_check(int_begin, int_end)
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'Y', 10)
        self.assertEqual(last_entry.year, int_end.year)
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'M', 10)
        self.assertEqual(last_entry.year, int_end.year)
        self.assertEqual(last_entry.month, int_end.month)
        last_entry = RangeInference.filter_daterange(int_begin, end, hit_check,
                                                     'D', 10)
        self.assertEqual(last_entry, int_end)
Пример #9
0
def create_parameter_generators(probe, parameter_handlers, filter_limits=True):
    """
    Loads the parameter information and creates a list of the respective
    generators from the ParamInjector module, while filtering the ranges as
    necessary
    """

    url_injectors = []
    initial_values = []

    for i in [1, 2]:
        # We run this code twice: the first pass will get the initial
        # values for each parameter, which is used in the second pass to
        # filter the ends of the limits as required
        # I couldn't find a nicer way to do this

        if not filter_limits and i == 2:
            # Don't filter limits unless required
            break

        for param_index, param in enumerate(parameter_handlers):
            param_type = param['parameter_type']
            param_gen = None

            if i == 2 and not param['filter_range']:
                # We are running the "filtering" pass but this parameter
                # should not be filtered
                continue

            entries_list = []
            cons_misses = None
            if i == 2:
                # Configure the list of extra parameters for the range
                # inference
                entries_list = initial_values.copy()
                entries_list[param_index] = None
                cons_misses = int(param['cons_misses'])

            if param_type == "process_code":
                PROCESS_FORMAT = '{:07d}-{:02d}.{:04d}.{}.{:02d}.{:04d}'

                first_year = int(param['first_year_proc_param'])
                last_year = int(param['last_year_proc_param'])
                segment_ids = param['segment_ids_proc_param'].split(",")
                court_ids = param['court_ids_proc_param'].split(",")
                origin_ids = param['origin_ids_proc_param'].split(",")

                # turn string lists into integers
                segment_ids = list(map(int, segment_ids))
                court_ids = list(map(int, court_ids))
                origin_ids = list(map(int, origin_ids))

                max_seq = 9999999
                if i == 2:
                    # Filter the process_code range
                    max_seq = RangeInference.filter_process_code(
                        first_year,
                        last_year,
                        segment_ids,
                        court_ids,
                        origin_ids,
                        probe,
                        entries_list,
                        cons_misses=cons_misses)

                subparam_list = [
                    # sequential identifier
                    (0, max_seq),
                    # year
                    (first_year, last_year),
                    # segment identifiers
                    segment_ids,
                    # court identifiers
                    court_ids,
                    # origin identifiers
                    origin_ids
                ]

                param_gen = ParamInjector.generate_format(
                    code_format=PROCESS_FORMAT,
                    param_limits=subparam_list,
                    verif=ParamInjector.process_code_verification,
                    verif_index=1)

            elif param_type == "number_seq":
                begin = int(param['first_num_param'])
                end = int(param['last_num_param'])

                if i == 2:
                    # Filter the number range
                    end = RangeInference.filter_numeric_range(
                        begin,
                        end,
                        probe,
                        entries_list,
                        cons_misses=cons_misses)

                param_gen = ParamInjector.generate_num_sequence(
                    first=begin,
                    last=end,
                    step=int(param['step_num_param']),
                    leading=param['leading_num_param'],
                )
            elif param_type == 'date_seq':
                begin = datetime.date.fromisoformat(
                    param['start_date_date_param'])
                end = datetime.date.fromisoformat(param['end_date_date_param'])
                frequency = param['frequency_date_param']
                date_format = param['date_format_date_param']

                if i == 2:
                    # Filter the date range
                    end = RangeInference.filter_daterange(
                        begin,
                        end,
                        probe,
                        frequency,
                        date_format,
                        entries_list,
                        cons_misses=cons_misses)

                param_gen = ParamInjector.generate_daterange(
                    date_format=date_format,
                    start_date=begin,
                    end_date=end,
                    frequency=frequency,
                )
            elif param_type == 'alpha_seq':
                # We don't do anything diferent here if it's the second
                # pass, since alphabetic sequences can't be filtered

                length = int(param['length_alpha_param'])
                num_words = int(param['num_words_alpha_param'])
                no_upper = param['no_upper_alpha_param']

                param_gen = ParamInjector.generate_alpha(length=length,
                                                         num_words=num_words,
                                                         no_upper=no_upper)
            elif param_type == 'value_list':
                # No filtering applied to this parameter
                list_values = param['value_list_param']

                param_gen = ParamInjector.generate_list(elements=list_values)
            elif param_type == 'const_value':
                # No filtering applied to this parameter
                const_value = param['value_const_param']

                param_gen = ParamInjector.generate_constant(value=const_value)
            else:
                raise ValueError(f"Invalid parameter type: {param_type}")

            if i == 2 and param_gen is not None:
                # We have filtered the range for this parameter, and should
                # update the generator in the list
                url_injectors[param_index] = param_gen
            else:
                # Create a copy of the generator, to extract the first
                # value. After that, add to the list of parameter
                # generators
                param_gen, param_gen_first = itertools.tee(param_gen)
                initial_values.append(next(param_gen_first))
                url_injectors.append(param_gen)

    return url_injectors