示例#1
0
class BaseXlSeriesScraper(object):
    """Base class for the highest level algorithms of `xlseries`.

    Attributes:
        wb (Workbook): An openpyxl workbook loaded with "data_only=True"
            parameter (this avoids reading formulae).
        params (Parameters): An optional attribute with parameters ready to be
            used in parsing wb. If not passed, the strategy will have to
            discover them or adopt a different approach to parse wb.
    """
    def __init__(self,
                 wb,
                 params_path_or_obj=None,
                 ws_name=None,
                 headers_validation=False):
        self.wb = wb
        self.ws_name = ws_name

        if self.ws_name:
            self.ws = self.wb[self.ws_name]
        else:
            self.ws = self.wb.active

        if isinstance(params_path_or_obj, Parameters):
            self.params = params_path_or_obj
        else:
            self.params = Parameters(params_path_or_obj)

        if headers_validation:
            # remove header coordinates that don't have any cell value (blanks)
            self.params.remove_blank_headers(self.ws)

    # PUBLIC INTERFACE
    @classmethod
    def accepts(cls, wb):
        return cls._accepts(wb)

    def get_data_frames(self, safe_mode):
        return self._get_data_frames(self.ws, self.params, safe_mode)
示例#2
0
    def test_remove_blank_headers(self):

        wb = Workbook()
        ws = wb.active

        params = Parameters({
            "headers_coord": ["A1", "B1", "C1"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "B1"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])

        params = Parameters({
            "headers_coord": ["A1_A2", "B1", "C1_C2"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A2", "B1", "C2"])
        self.assertEqual(params["data_starts"], [2, 2, 2])
        self.assertEqual(params["data_ends"], [256, 256, 256])

        ws["E4"].value = "dont remove!"
        params = Parameters({
            "headers_coord": ["A1", "E1", "E2", "E3", "E4"],
            "data_starts": 2,
            "data_ends": 256,
            "frequency": "m",
            "time_header_coord": "A1",
        })
        ws["A1"].value = "Importaciones"
        ws["B1"].value = "Exportaciones"
        ws["C1"].value = "Saldo"
        params.remove_blank_headers(ws)

        self.assertEqual(params["headers_coord"], ["A1", "E4"])
        self.assertEqual(params["data_starts"], [2, 2])
        self.assertEqual(params["data_ends"], [256, 256])