def parse_results_line(chars):
    _left = chars[chars["x0rel"] < 125]
    left = collate_chars(_left) if len(_left) else None
    _right = chars[(chars["x0rel"] > 155)]
    right = int(collate_chars(_right)) if len(_right) else None
    _mid = chars[(chars["x0rel"] > 125) & (chars["x0rel"] < 155)]
    mid = collate_chars(_mid) if len(_mid) else None
    return {"text": left, "aff": mid, "votes": right}
    def test_pandas(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
        table = cropped.extract_table({
            "horizontal_strategy":
            "text",
            "explicit_vertical_lines":
            [min(map(itemgetter("x0"), cropped.chars))],
            "intersection_tolerance":
            5
        })

        table = pd.DataFrame(table)

        def parse_value(x):
            if pd.isnull(x) or x == "": return None
            return int(x.replace(",", ""))

        table.columns = COLUMNS
        table[table.columns[1:]] = table[table.columns[1:]].applymap(
            parse_value)

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = table[c].iloc[-1]
            colsum = table[c].sum()
            assert (colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars)
        assert (month_text == "November - 2015")
    def test_plain(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
        table = cropped.extract_table({
            "horizontal_strategy":
            "text",
            "explicit_vertical_lines":
            [min(map(itemgetter("x0"), cropped.chars))],
            "intersection_tolerance":
            5
        })
        print(table)

        def parse_value(k, x):
            if k == 0: return x
            if x in (None, ""): return None
            return int(x.replace(",", ""))

        def parse_row(row):
            return dict(
                (COLUMNS[i], parse_value(i, v)) for i, v in enumerate(row))

        parsed_table = [parse_row(row) for row in table]

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = parsed_table[-1][c]
            colsum = sum(row[c] or 0 for row in parsed_table)
            assert (colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars)
        assert (month_text == "November - 2015")
    def test_pandas(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))

        _table = cropped.extract_table(h="gutters", x_tolerance=5, y_tolerance=5, gutter_min_height=5)

        table = pd.DataFrame(_table)

        def parse_value(x):
            if pd.isnull(x):
                return None
            return int(x.replace(",", ""))

        table.columns = COLUMNS
        table[table.columns[1:]] = table[table.columns[1:]].applymap(parse_value)

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = table[c].iloc[-1]
            colsum = table[c].sum()
            assert colsum == (total * 2)

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars, x_tolerance=2)
        assert month_text == "November - 2015"
    def test_pandas(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))

        _table = cropped.extract_table(h="gutters",
                                       x_tolerance=5,
                                       y_tolerance=5,
                                       gutter_min_height=5)

        table = pd.DataFrame(_table)

        def parse_value(x):
            if pd.isnull(x): return None
            return int(x.replace(",", ""))

        table.columns = COLUMNS
        table[table.columns[1:]] = table[table.columns[1:]].applymap(
            parse_value)

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = table[c].iloc[-1]
            colsum = table[c].sum()
            assert (colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars, x_tolerance=2)
        assert (month_text == "November - 2015")
    def test_plain(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
        table = cropped.extract_table(h="gutters",
                                      x_tolerance=5,
                                      y_tolerance=5,
                                      gutter_min_height=5)

        def parse_value(k, x):
            if k == 0: return x
            if x == None: return None
            return int(x.replace(",", ""))

        def parse_row(row):
            return dict(
                (COLUMNS[i], parse_value(i, v)) for i, v in enumerate(row))

        parsed_table = [parse_row(row) for row in table]

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = parsed_table[-1][c]
            colsum = sum(row[c] or 0 for row in parsed_table)
            assert (colsum == (total * 2))

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars, x_tolerance=2)
        assert (month_text == "November - 2015")
def _collate_chars(x):
    return collate_chars(x, x_tolerance=1)
def _collate_chars(x):
    return collate_chars(x, x_tolerance=1)
    def test_plain(self):
        page = self.pdf.pages[0]
        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
        table = cropped.extract_table(h="gutters", x_tolerance=5, y_tolerance=5, gutter_min_height=5)

        def parse_value(k, x):
            if k == 0:
                return x
            if x == None:
                return None
            return int(x.replace(",", ""))

        def parse_row(row):
            return dict((COLUMNS[i], parse_value(i, v)) for i, v in enumerate(row))

        parsed_table = [parse_row(row) for row in table]

        # [1:] because first column is state name
        for c in COLUMNS[1:]:
            total = parsed_table[-1][c]
            colsum = sum(row[c] or 0 for row in parsed_table)
            assert colsum == (total * 2)

        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
        month_text = collate_chars(month_chars, x_tolerance=2)
        assert month_text == "November - 2015"