예제 #1
0
    def test_process_context(self):

        context = {"GDP": "A5-A8"}
        headers_coord = ["A5", "A6", "A7", "A8"]
        exp_context = [["GDP"], ["GDP"], ["GDP"], ["GDP"]]
        self.assertEqual(
            Parameters._process_context(context, headers_coord), exp_context)

        context = {"GDP": "A5-A8"}
        headers_coord = ["B5", "B6", "B7", "B8"]
        exp_context = [["GDP"], ["GDP"], ["GDP"], ["GDP"]]
        self.assertEqual(
            Parameters._process_context(context, headers_coord), exp_context)

        context = {"GDP": ["A5-A6", "A8-A9"]}
        headers_coord = ["B5", "B6", "B7", "B8", "B9"]
        exp_context = [["GDP"], ["GDP"], [], ["GDP"], ["GDP"]]
        self.assertEqual(
            Parameters._process_context(context, headers_coord), exp_context)

        context = {
            "GDP": ["A5-A6", "A8-A9"],
            "Agricultural": "A5-A6",
            "Industrial": "A8-A9"
        }
        headers_coord = ["B5", "B6", "B7", "B8", "B9"]
        exp_context = [["GDP", "Agricultural"], ["GDP", "Agricultural"], [],
                       ["GDP", "Industrial"], ["GDP", "Industrial"]]
        self.assertEqual(
            Parameters._process_context(context, headers_coord), exp_context)
예제 #2
0
    def test_generate_attempts(self):
        params = Parameters({
            "alignment": "vertical",
            "headers_coord": ["B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "M",
            "time_header_coord": "A1",
            "time_multicolumn": True,
            "time_composed": True,
            "time_alignment": 0,
            "continuity": True,
            "blank_rows": True,
            "missings": None,
            "missing_value": None,
            "series_names": None
        })

        non_discovered = ["missings"]
        attempts = ParameterDiscovery._generate_attempts(
            non_discovered, params)
        p1 = Parameters({
            "alignment": "vertical",
            "headers_coord": ["B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "M",
            "time_header_coord": "A1",
            "time_multicolumn": True,
            "time_composed": True,
            "time_alignment": 0,
            "continuity": True,
            "blank_rows": True,
            "missings": True,
            "missing_value": None,
            "series_names": None
        })
        p2 = Parameters({
            "alignment": "vertical",
            "headers_coord": ["B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "M",
            "time_header_coord": "A1",
            "time_multicolumn": True,
            "time_composed": True,
            "time_alignment": 0,
            "continuity": True,
            "blank_rows": True,
            "missings": False,
            "missing_value": None,
            "series_names": None
        })

        self.assertEqual(len(attempts), 2)

        for param_name in attempts[0]:
            self.assertEqual(p2[param_name], attempts[0][param_name])
        for param_name in attempts[1]:
            self.assertEqual(p1[param_name], attempts[1][param_name])
예제 #3
0
    def test_unpack_composed_header_ranges(self):

        exp = ["A5_B5", "A6_B6", "A7_B7", "A8_B8"]
        self.assertEqual(
            Parameters._unpack_header_ranges("(a5_B5)-(A8_B8)"), exp)

        exp = [["A1_B1", "A2_B2", "A3_B3"], ["A1_B1", "A2_B2", "A3_B3"]]
        orig = [["(A1_B1)-(A3_b3)"], ["(A1_B1)-(A3_b3)"]]
        self.assertEqual(Parameters._unpack_header_ranges(orig), exp)
예제 #4
0
    def test_separate_composed_headers(self):

        headers_coord = ["A1_B1", "A2_B2", "A3_B3"]
        exp = ([["A1"], ["A2"], ["A3"]], ["B1", "B2", "B3"])
        self.assertEqual(
            Parameters._separate_composed_headers(headers_coord), exp)

        headers_coord = ["A1_B1_C1", "A2_B2_C2", "A3_B3_C3"]
        exp = ([["A1", "B1"], ["A2", "B2"], ["A3", "B3"]], ["C1", "C2", "C3"])
        self.assertEqual(
            Parameters._separate_composed_headers(headers_coord), exp)
예제 #5
0
    def test_remove_series(self):

        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        params.remove_series(1)
        self.assertEqual(params.headers_coord, ["A1", "C1"])
        self.assertTrue(len(params.data_starts), 2)
        self.assertTrue(len(params.time_header_coord), 2)
예제 #6
0
    def check_critical_dict_params(self, case_num):
        """Check critical dict parameters loading.

        Args:
            case_num (int): The test case number to run.
        """

        params = Parameters(self.CRITICAL_PARAMS[case_num].copy())
        exp_params = load_critical_parameters_case(case_num)

        # override the guessing of Parameters
        params.remove("alignment")

        self.assertEqual(params, exp_params)
예제 #7
0
    def test_check_consistency(self):
        params_dict = {
            "data_starts": 1,
            "headers_coord": ["A2", "B2", "C2", "D2"]
        }
        with self.assertRaises(AssertionError):
            Parameters._check_consistency(params_dict)

        params_dict = {
            "data_starts": 1,
            "headers_coord": ["B1", "B2", "B3", "B4"]
        }
        with self.assertRaises(AssertionError):
            Parameters._check_consistency(params_dict)
예제 #8
0
    def test_get_series_params(self):
        params = Parameters(
            get_orig_params_path("test_params_time_multicolumn.json"))

        self.assertEqual(params["time_header_coord"],
                         [["A1", "A2"], ["A1", "A2"], ["A1", "A2"]])

        self.assertEqual(params[0]["time_header_coord"], ["A1", "A2"])
예제 #9
0
 def test_freq_translation(self):
     params = Parameters({
         "headers_coord": ["A1", "B1", "C1"],
         "data_starts": 2,
         "data_ends": 256,
         "frequency": "YQQQQ",
         "time_header_coord": "A1",
     })
     self.assertEqual(params["frequency"], ["AQQQQ", "AQQQQ", "AQQQQ"])
예제 #10
0
    def __init__(self, wb, params_path_or_obj=None, ws_name=None,
                 headers_validation=False):
        self.wb = wb
        self.ws_name = ws_name

        if self.ws_name:
            self.ws = self.wb[self.ws_name]
        else:
            self.ws = self.wb.active

        if isinstance(params_path_or_obj, Parameters):
            self.params = params_path_or_obj
        else:
            self.params = Parameters(params_path_or_obj)

        if headers_validation:
            # remove header coordinates that don't have any cell value (blanks)
            self.params.remove_blank_headers(self.ws)
예제 #11
0
    def test_case_external1(self):
        p = {
            'data_starts': 2,
            'frequency': 'Q',
            'headers_coord': 'A53',
            'time_header_coord': 'A52'
        }
        params = Parameters(p)
        self.assertTrue(params["alignment"] is None)

        p2 = {
            'alignment': 'horizontal',
            'data_starts': 2,
            'frequency': 'Q',
            'headers_coord': 'A53',
            'time_header_coord': 'A52'
        }
        params = Parameters(p2)
        self.assertEqual(params["alignment"][0], "horizontal")
예제 #12
0
class BaseXlSeriesScraper(object):
    """Base class for the highest level algorithms of `xlseries`.

    Attributes:
        wb (Workbook): An openpyxl workbook loaded with "data_only=True"
            parameter (this avoids reading formulae).
        params (Parameters): An optional attribute with parameters ready to be
            used in parsing wb. If not passed, the strategy will have to
            discover them or adopt a different approach to parse wb.
    """
    def __init__(self,
                 wb,
                 params_path_or_obj=None,
                 ws_name=None,
                 headers_validation=False):
        self.wb = wb
        self.ws_name = ws_name

        if self.ws_name:
            self.ws = self.wb[self.ws_name]
        else:
            self.ws = self.wb.active

        if isinstance(params_path_or_obj, Parameters):
            self.params = params_path_or_obj
        else:
            self.params = Parameters(params_path_or_obj)

        if headers_validation:
            # remove header coordinates that don't have any cell value (blanks)
            self.params.remove_blank_headers(self.ws)

    # PUBLIC INTERFACE
    @classmethod
    def accepts(cls, wb):
        return cls._accepts(wb)

    def get_data_frames(self, safe_mode):
        return self._get_data_frames(self.ws, self.params, safe_mode)
예제 #13
0
def load_parameters_case(case_num=1, special_case=None):
    """Load the parameters of an integration test case.

    Args:
        case_num (int): Number of the case to load.
        special_case (str): Name of a special version of the test case, if any.

    Returns:
        Parameters: Test case parameters loaded.
    """
    case_name = _gen_filename(case_num, special_case, "json")
    case_path = os.path.join(get_param_cases_dir(), case_name)

    return Parameters(case_path)
예제 #14
0
    def test_load_from_dict(self):
        with open(get_orig_params_path("test_params.json")) as f:
            params_dict = json.load(f)
        params = Parameters(params_dict)
        # pprint(params.__dict__)
        # pprint(self.params_exp.__dict__)

        for exp_params_name in self.params_exp.__dict__:
            self.assertEqual(params.__dict__[exp_params_name],
                             self.params_exp.__dict__[exp_params_name])

        for orig_params_name in params.__dict__:
            self.assertEqual(params.__dict__[orig_params_name],
                             self.params_exp.__dict__[orig_params_name])
예제 #15
0
    def test_context(self):
        p = {
            'data_starts': 2,
            'frequency': 'Q',
            'headers_coord': 'A2-A5',
            'time_header_coord': 'A1',
            'context': {
                "GDP": "A2-A5"
            }
        }
        params = Parameters(p)

        exp_context = [["GDP"], ["GDP"], ["GDP"], ["GDP"]]
        self.assertEqual(params["context"], exp_context)
예제 #16
0
    def test_composed_headers(self):
        p = {
            'data_starts': 4,
            'frequency': 'Q',
            'headers_coord': '(A2_B2)-(A5_B5)',
            'time_header_coord': 'A1'
        }
        params = Parameters(p)

        exp_headers_coord = ["B2", "B3", "B4", "B5"]
        self.assertEqual(params["headers_coord"], exp_headers_coord)

        exp_composed_headers_coord = [["A2"], ["A3"], ["A4"], ["A5"]]
        self.assertEqual(params["composed_headers_coord"],
                         exp_composed_headers_coord)
예제 #17
0
    def test_apply_to_all_missing_value(self):
        missing_value = "-"
        num_series = 3
        res = Parameters._apply_to_all_missing_value(missing_value, num_series)
        exp = [["-"], ["-"], ["-"]]
        self.assertEqual(res, exp)

        missing_value = ["-"]
        num_series = 3
        res = Parameters._apply_to_all_missing_value(missing_value, num_series)
        exp = [["-"], ["-"], ["-"]]
        self.assertEqual(res, exp)

        missing_value = ["-", "."]
        num_series = 3
        res = Parameters._apply_to_all_missing_value(missing_value, num_series)
        exp = [["-", "."], ["-", "."], ["-", "."]]
        self.assertEqual(res, exp)

        missing_value = []
        num_series = 3
        res = Parameters._apply_to_all_missing_value(missing_value, num_series)
        exp = [[], [], []]
        self.assertEqual(res, exp)
예제 #18
0
    def test_get_missings(self):
        params = Parameters({
            "alignment": None,
            "headers_coord": ["B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
            "time_multicolumn": None,
            "time_composed": None,
            "time_alignment": 0,
            "continuity": None,
            "blank_rows": None,
            "missings": None,
            "missing_value": None,
            "series_names": None,
            "composed_headers_coord": None,
            "context": None
        })
        exp_missings = [
            "time_composed", "continuity", "blank_rows", "missings"
        ]

        self.assertEqual(set(exp_missings), set(params.get_missings()))
예제 #19
0
    def test_guess_alignment(self):
        headers = ["A1", "B1", "C1"]
        self.assertEqual(Parameters._guess_alignment(headers), "vertical")

        headers = ["A1", "B1", "D1", "E1"]
        self.assertEqual(Parameters._guess_alignment(headers), "vertical")

        headers = ["A1", "A2"]
        self.assertEqual(Parameters._guess_alignment(headers), "horizontal")

        headers = ["A1", "A3", "A5", "A7"]
        self.assertEqual(Parameters._guess_alignment(headers), "horizontal")

        headers = ["A1", "A3", "A5"]
        self.assertEqual(Parameters._guess_alignment(headers), None)

        headers = ["A1", "A3", "A5", "B7"]
        self.assertEqual(Parameters._guess_alignment(headers), None)
예제 #20
0
    def test_unpack_header_ranges(self):

        exp = ["A5", "A6", "A7", "A8"]
        self.assertEqual(Parameters._unpack_header_ranges("a5-A8"), exp)

        exp = ["A5", "B5", "C5"]
        self.assertEqual(Parameters._unpack_header_ranges("A5-c5"), exp)

        exp = ["A5"]
        self.assertEqual(Parameters._unpack_header_ranges("a5"), exp)

        exp = None
        self.assertEqual(Parameters._unpack_header_ranges("None"), exp)

        exp = [["A1", "A2"], ["A1", "A2"]]
        orig = [["A1", "A2"], ["A1", "A2"]]
        self.assertEqual(Parameters._unpack_header_ranges(orig), exp)

        exp = [["A1", "A2", "A3"], ["A1", "A2", "A3"]]
        orig = [["A1-A3"], ["A1-A3"]]
        self.assertEqual(Parameters._unpack_header_ranges(orig), exp)
예제 #21
0
 def test_ensure_critical_parameters_exception(self):
     params = {"data_starts": None}
     critical = ["data_starts"]
     valid_values = {"data_starts": [int]}
     with self.assertRaises(CriticalParameterMissing):
         Parameters._check_has_critical(params, critical, valid_values)
예제 #22
0
 def setUp(self):
     self.params = Parameters(get_orig_params_path("test_params.json"))
     self.params_exp = Parameters(get_exp_params_path("test_params.json"))
예제 #23
0
class ParametersTest(unittest.TestCase):
    def setUp(self):
        self.params = Parameters(get_orig_params_path("test_params.json"))
        self.params_exp = Parameters(get_exp_params_path("test_params.json"))

    def tearDown(self):
        del self.params

    # @unittest.skip("skip")
    def test_load_from_json(self):
        self.assertEqual(self.params.__dict__, self.params_exp.__dict__)

    def test_load_from_dict(self):
        with open(get_orig_params_path("test_params.json")) as f:
            params_dict = json.load(f)
        params = Parameters(params_dict)
        # pprint(params.__dict__)
        # pprint(self.params_exp.__dict__)

        for exp_params_name in self.params_exp.__dict__:
            self.assertEqual(params.__dict__[exp_params_name],
                             self.params_exp.__dict__[exp_params_name])

        for orig_params_name in params.__dict__:
            self.assertEqual(params.__dict__[orig_params_name],
                             self.params_exp.__dict__[orig_params_name])

    # @unittest.skip("skip")
    def test_get_num_series(self):
        self.assertEqual(self.params._get_num_series(self.params.__dict__), 3)
        self.assertEqual(self.params._get_num_series({"param": None}), None)

    def test_get_series_params(self):
        params = Parameters(
            get_orig_params_path("test_params_time_multicolumn.json"))

        self.assertEqual(params["time_header_coord"],
                         [["A1", "A2"], ["A1", "A2"], ["A1", "A2"]])

        self.assertEqual(params[0]["time_header_coord"], ["A1", "A2"])

    def test_valid_param_value(self):
        self.assertTrue(self.params._valid_param_value(True, [True, False]))
        self.assertTrue(self.params._valid_param_value(True, []))
        self.assertFalse(self.params._valid_param_value("A1", [True, False]))
        self.assertFalse(self.params._valid_param_value(None, [True, False]))

    def test_valid_freq(self):
        valid_freqs = ["Y", "Q", "M", "W", "D"]
        self.assertTrue(self.params._valid_freq("YQQQQ", valid_freqs))
        self.assertTrue(self.params._valid_freq("D", valid_freqs))
        self.assertFalse(self.params._valid_freq("YQQX", valid_freqs))

    def test_freq_translation(self):
        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "y",
            "time_header_coord": "A1",
        })
        self.assertEqual(params["frequency"], ["A", "A", "A"])

    def test_freq_translation(self):
        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "YQQQQ",
            "time_header_coord": "A1",
        })
        self.assertEqual(params["frequency"], ["AQQQQ", "AQQQQ", "AQQQQ"])

    def test_get_missings(self):
        params = Parameters({
            "alignment": None,
            "headers_coord": ["B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
            "time_multicolumn": None,
            "time_composed": None,
            "time_alignment": 0,
            "continuity": None,
            "blank_rows": None,
            "missings": None,
            "missing_value": None,
            "series_names": None,
            "composed_headers_coord": None,
            "context": None
        })
        exp_missings = [
            "time_composed", "continuity", "blank_rows", "missings"
        ]

        self.assertEqual(set(exp_missings), set(params.get_missings()))

    def test_validate_parameters_exception(self):
        params = {"continuity": "A1"}
        valid_values = {"continuity": [True, False]}
        with self.assertRaises(InvalidParameter):
            self.params._validate_parameters(params, valid_values)

    def test_remove_blank_headers(self):

        wb = Workbook()
        ws = wb.active

        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "B1"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])

        params = Parameters({
            "headers_coord": ["A1_A2", "B1", "C1_C2"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A2", "B1", "C2"])
        self.assertEqual(params["data_starts"], [2, 2, 2])
        self.assertEqual(params["data_ends"], [256, 256, 256])

        ws["E4"].value = "dont remove!"
        params = Parameters({
            "headers_coord": ["A1", "E1", "E2", "E3", "E4"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "E4"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])

    def test_remove_series(self):

        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        params.remove_series(1)
        self.assertEqual(params.headers_coord, ["A1", "C1"])
        self.assertTrue(len(params.data_starts), 2)
        self.assertTrue(len(params.time_header_coord), 2)
예제 #24
0
    def test_remove_blank_headers(self):

        wb = Workbook()
        ws = wb.active

        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "B1"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])

        params = Parameters({
            "headers_coord": ["A1_A2", "B1", "C1_C2"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A2", "B1", "C2"])
        self.assertEqual(params["data_starts"], [2, 2, 2])
        self.assertEqual(params["data_ends"], [256, 256, 256])

        ws["E4"].value = "dont remove!"
        params = Parameters({
            "headers_coord": ["A1", "E1", "E2", "E3", "E4"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "E4"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])