Пример #1
0
 def real_build_front_layout(self, page):
     regionlist = []
     n = 0
     columns = ask(
         """Enter the column offsets of the vote columns, separated by commas""",
         CSV(int))
     for cnum, column in enumerate(columns):
         print "Contests for Column", cnum, "at x offset", column
         while True:
             contest = ask(
                 """Enter a contest name.  When done entering contests, \ntype 'x' and the <enter> key to continue."""
             )
             if contest.strip().lower() == "x":
                 break
             choices = ask("Enter a comma separated list of choices", CSV())
             # values are the x1,y1,x2,y2 of the bounding box of the contest
             # bounding box, 0 for regular contest or 1 for proposition,
             # and the text of the contest; we'll just dummy them here
             regionlist.append(
                 Ballot.Contest(column, 1, 199, 5 * const.dpi, 0, contest))
             for choice in choices:
                 x_offset = ask(
                     "Enter the x offset of the upper left hand corner \nof the printed vote target for "
                     + choice, int)
                 y_offset = ask(
                     "Enter the y offset of the upper left hand corner \nof the printed vote target for "
                     + choice, int)
                 # values are the x,y of the upper left corner
                 # of the printed vote opportunity,
                 # and the text of the choice
                 #TODO add x2,y2
                 regionlist[-1].append(
                     Ballot.Choice(x_offset, y_offset, choice))
     return regionlist
Пример #2
0
    def build_layout(self, page):
        """ get layout and ocr information from Demo ballot

        Building the layout will be the largest task for registering
        a new ballot brand which uses a different layout style.

        Here, we'll ask the user to enter column x-offsets, 
        then contests and their regions,
        and choices belonging to the contest.
        """
        print """Entering build_layout.

You will need to provide a comma separated list of column offsets,
then you will need to provide, for each column, information about
each contest in that column: its contest text, its starting y offset,
and the same for each choice in the contest.
"""
        regionlist = []
        n = 0
        columns = ask(
            """Enter the column offsets of the vote columns, separated by commas""",
            CSV(int)
        )
        for cnum, column in enumerate(columns):
            print "Contests for Column", cnum, "at x offset", column
            while True:
                contest = ask("""Enter a contest name.  When done entering contests, \ntype 'x' and the <enter> key to continue.""")
                if contest.strip().lower() == "x":
                    break
                choices = ask("Enter a comma separated list of choices",
                    CSV())
                # values are the x1,y1,x2,y2 of the bounding box of the contest
                # bounding box, 0 for regular contest or 1 for proposition,
                # and the text of the contest; we'll just dummy them here
                regionlist.append(Ballot.Contest(column, 1, 199, 5*const.dpi, 0, contest))
                for choice in choices:
                    x_offset = ask("Enter the x offset of the upper left hand corner \nof the printed vote target for " + choice, int)
                    y_offset = ask("Enter the y offset of the upper left hand corner \nof the printed vote target for " + choice, int)
                    # values are the x,y of the upper left corner
                    # of the printed vote opportunity, 
                    # and the text of the choice
                    #TODO add x2,y2
                    regionlist[-1].append(Ballot.Choice(x_offset, y_offset, choice))
        return regionlist
Пример #3
0
    def get_contests_and_votes_from(self, image, regionlist, croplist):
        """ given an area known to contain votes and desc text, return info

        The cropped area will contain contest descriptions and voting areas.
        Unfortunately, the contest descriptions are not indented away from
        the oval voting areas.  So...  we crop looking for white line splits,
        and then treat every line as either part of a contest or as a vote
        line, depending on whether we find a pattern of white indicating
        the line contains only an oval and a single word, YES or NO.
        """
        adj = lambda f: int(round(const.dpi * f))
        oval_offset_into_column = adj(0.14)
        oval_end_offset_into_column = adj(0.39)

        votetext_offset_into_column = oval_end_offset_into_column
        votetext_offset_into_column += oval_offset_into_column
        votetext_offset_into_column += adj(0.02)

        half_intensity = 128
        contests = []
        contest_string = ""
        crop = image.crop(croplist)
        # indent by 1/10" to avoid edges, then crop single pixel lines,
        # finding beginning and end of zones which include dark pixels
        # now check each dark zone to see if it is a vote op
        # or if it is descriptive text; vote ops will have an oval
        # in the oval channel beginning at 0.14 and extending for .24,
        # then text beginning at .38
        dark_zones = self.get_dark_zones(crop)
        contest_created = False
        for dz in dark_zones:
            zonecrop1 = crop.crop((const.dpi / 10, dz[0],
                                   crop.size[0] - (const.dpi / 10), dz[1]))
            zonecrop2 = crop.crop((oval_end_offset_into_column, dz[0],
                                   votetext_offset_into_column, dz[1]))
            zone2stat = ImageStat.Stat(zonecrop2)
            zonecrop3 = crop.crop(
                (votetext_offset_into_column, dz[0],
                 votetext_offset_into_column + const.dpi, dz[1]))
            zone1text = self.extensions.ocr_engine(zonecrop1)
            zone1text = self.extensions.ocr_cleaner(zone1text)
            zone3text = self.extensions.ocr_engine(zonecrop3)
            zone3text = self.extensions.ocr_cleaner(zone3text)
            intensity_suggests_voteop = False
            length_suggests_voteop = False
            if zone2stat.mean[0] > 244: intensity_suggests_voteop = True
            if len(zone3text) < 6: length_suggests_voteop = True
            if not intensity_suggests_voteop and not length_suggests_voteop:
                contest_created = False
                contest_string += zone1text.replace("\n", "/")
            elif intensity_suggests_voteop and length_suggests_voteop:
                # create contest if none created, then
                if not contest_created:
                    contest_created = True
                    self.log.debug("Creating contest %s" % (contest_string, ))
                    regionlist.append(
                        Ballot.Contest(croplist[0], croplist[1] + dz[0],
                                       croplist[2], croplist[1] + dz[1], 0,
                                       contest_string))
                    contest_string = ""
                # add voteop to contest
                choice_string = zone3text
                self.log.debug("Adding choice %s" % (choice_string, ))
                regionlist[-1].append(
                    Ballot.Choice(croplist[0] + oval_offset_into_column,
                                  croplist[1] + dz[0], choice_string))

            else:
                if contest_created:
                    contest_string += zone1text.replace("\n", "//")
                else:
                    self.log.debug(
                        "Problem determining whether contest or choice")
                    self.log.debug("Gap mean values %s" % (zone2stat.mean, ))
                    self.log.debug("Zone3 text %s" % (zone3text, ))
                    self.log.debug("Contest string: %s" % (contest_string, ))
        return dark_zones
Пример #4
0
    def generate_transition_list_from_zones(self, image, regionlist,
                                            column_bounds, left, middle):
        """ given the pair of zone lists, generate a comprehensive list

        We should then be able to merge these sets of split information:
        anything where we find solid black or halftone is a definite break
        which may be followed either by another black or halftone area, by
        a description area, or by a vote area.
        """
        ccontest_default = "No current contest"
        ccontest = ccontest_default
        cjurisdiction_default = "No current jurisdiction"
        cjurisdiction = cjurisdiction_default
        contest_instance = None
        next_white_is_votearea = False
        this_white_is_votearea = False
        next_white_is_yesno = False
        this_white_is_yesno = False
        for n in range(len(left)):
            this_white_is_votearea = False
            if next_white_is_votearea == True:
                this_white_is_votearea = True
                next_white_is_votearea = False
            this_white_is_yesno = False
            if next_white_is_yesno == True:
                this_white_is_yesno = True
                next_white_is_yesno = False
            this_y = left[n][0]
            try:
                next_zone = left[n + 1]
            except IndexError:
                next_zone = [0, 'X']
            next_y = next_zone[0]
            rel_end = next_y - (const.dpi / 10)
            if left[n][1] == 'B':
                self.log.debug("Black zone at %d to %d %s" %
                               (this_y, next_y, next_zone))
                # if it's a legitimate black zone and the next zone is white,
                # that white zone is a Yes/No Vote Area (or empty)
                if (next_y - this_y) > (const.dpi / 4):
                    next_white_is_yesno = True
                    # this zone becomes the current Jurisdiction
                    crop = image.crop(
                        (column_bounds[0], this_y, column_bounds[1], next_y))
                    cjurisdiction = self.extensions.ocr_engine(crop)
                    self.log.debug("Jurisdiction %s" % (cjurisdiction, ))
                    cjurisdiction = self.extensions.ocr_cleaner(cjurisdiction)
                    cjurisdiction = cjurisdiction.replace("\n", "//").strip()
                    self.log.debug("Cleaned Jurisdiction %s" %
                                   (cjurisdiction, ))
                    # and the current contest is set
                    # from the descriptive text
                    # at the start of the Yes No Vote area
            if left[n][1] == 'G':
                self.log.debug("Gray zone at %d to %d %s" %
                               (this_y, next_y, next_zone))
                # if it's a legitimage gray zone and the next zone is white,
                # that white zone is a voting area (or empty)
                if (next_y - this_y) > (const.dpi / 2):
                    next_white_is_votearea = True
                    crop = image.crop(
                        (column_bounds[0], this_y, column_bounds[1], next_y))
                    crop = Image.eval(crop, elim_halftone)
                    ccontest = self.extensions.ocr_engine(crop)
                    ccontest = ccontest.replace("\n", "//").strip()
                    self.log.debug("Contest %s" % (ccontest, ))
                    ccontest = self.extensions.ocr_cleaner(ccontest)
                    self.log.debug("Cleaned Contest %s" % (ccontest, ))
                    contest_instance = Ballot.Contest(column_bounds[0], this_y,
                                                      column_bounds[1],
                                                      this_y + next_y, 0,
                                                      ccontest)
                    regionlist.append(contest_instance)
            if left[n][1] == 'W':
                if this_white_is_votearea:
                    # no descriptive text anticipated
                    self.get_only_votes_from(
                        image, contest_instance,
                        (column_bounds[0], this_y, column_bounds[1], next_y))
                if this_white_is_yesno:
                    # descriptive text sets current contest,
                    # votes are in stretches where the middle is white
                    self.get_contests_and_votes_from(
                        image, regionlist,
                        (column_bounds[0], this_y, column_bounds[1], next_y))
                self.log.debug("White zone at %d to %d %s" %
                               (this_y, next_y, next_zone))
        return regionlist
Пример #5
0
    def build_layout(self, page, back=False):
        """ Get layout and ocr information from Diebold ballot

        Assumes page.image has been deskewed.

        First, determine number of columns and starting x of each column.

        Initial pass dummies column starts by pre-filling column list at
        known offsets for 8.5" wide 3 column.

        Then, for each column:

        Get horizontal lines spanning column
        Horizontal lines separated by at least 1/2" may be a contest;
        within each potential contest, locate vote targets.

        Potential contests with more than one vote target may become
        contests appended to Contest list, the vote targets become choices
        on the Contest's choice list.

        Return list of contests.
        

        """
        thinline_width = adj(0.01)
        text_margin = adj(0.03)
        contest_list = []
        # columns begin 1/32" from inboard side of first dash,
        # and the first two columns of a three column Diebold ballot
        # are each 2.75" wide
        landmark_x = page.landmarks[0][0]

        column_bound_vlines = (landmark_x + adj(.03), landmark_x + adj(2.78),
                               landmark_x + adj(5.53), landmark_x + adj(8.03))
        # the last boundary vline is not a column start, only a column end
        column_start_vlines = column_bound_vlines[:-1]
        # the next column's start is the current column's end
        column_end_vlines = column_bound_vlines[1:]
        vthip = adj(const.vote_target_horiz_offset_inches)
        vt_width_pixels = adj(const.target_width_inches)
        for column_start_x, column_end_x in zip(column_start_vlines,
                                                column_end_vlines):
            # add the config file vote offset to the column_x
            # to get the the start of a vote oval; add half the
            # oval width from the config file to get its center
            oval_center_x = column_start_x + vthip + (vt_width_pixels / 2)
            oval_text_start_x = column_start_x + vthip + vt_width_pixels + text_margin
            # find horizontal lines searching at column center
            column_center_x = (column_start_x + column_end_x) / 2

            lines = find_horizontal_lines(page, column_center_x, const.dpi)
            #print "Lines",lines, "at column center",column_center_x
            # find which pairs could be contests
            pot_contests = find_potential_contests(lines, const.dpi / 2)
            #print "Potential Contests",pot_contests
            # find the vote targets between those pairs
            for contest_start_y, contest_end_y in pot_contests:
                self.log.debug("Searching targets from %d,%d to %d,%d" %
                               (column_start_x, contest_start_y, column_end_x,
                                contest_end_y))
                vote_targets = find_untinted_voteops(page, oval_center_x,
                                                     contest_start_y,
                                                     contest_end_y, const.dpi)
                #print "Found vote targets at",vote_targets
                # if you've found any vote targets,
                # create a contest and add vote_targets as choices

                if len(vote_targets) > 0:
                    # ocr contest text
                    vertical_space_after_description = const.dpi / 10
                    contest_text_croplist = (column_start_x + thinline_width,
                                             contest_start_y + thinline_width,
                                             column_end_x - thinline_width,
                                             vote_targets[0][1] -
                                             vertical_space_after_description)
                    contest_text = self.extensions.ocr_engine(
                        page.image.crop(contest_text_croplist))
                    contest_text = self.extensions.ocr_cleaner(contest_text)
                    #pdb.set_trace()
                    this_contest = Ballot.Contest(column_start_x,
                                                  contest_start_y,
                                                  column_end_x, contest_end_y,
                                                  0, contest_text)
                    #print "Appending",this_contest
                    #print contest_list
                    contest_list.append(this_contest)
                    # add vote targets
                    for n in range(len(vote_targets)):
                        this_target_x, this_target_y = vote_targets[n]
                        this_target_text_x = (this_target_x + vt_width_pixels +
                                              text_margin)
                        this_target_text_y = (this_target_y - text_margin)
                        try:
                            next_target_x, next_target_y = vote_targets[n + 1]
                        except IndexError:
                            next_target_x = column_end_x - thinline_width
                            next_target_y = contest_end_y - thinline_width
                        if abs(next_target_x - this_target_x) > (const.dpi /
                                                                 4):
                            # the two targets bottom edges are aligned
                            choice_text_croplist = (this_target_text_x,
                                                    this_target_text_y,
                                                    next_target_x -
                                                    text_margin,
                                                    contest_end_y -
                                                    thinline_width)
                        else:
                            # the two targets left edges are aligned
                            choice_text_croplist = (this_target_text_x,
                                                    this_target_text_y,
                                                    column_end_x - text_margin,
                                                    next_target_y -
                                                    text_margin)
                        choice_text = self.extensions.ocr_engine(
                            page.image.crop(choice_text_croplist))
                        choice_text = self.extensions.ocr_cleaner(choice_text)

                        this_choice = Ballot.Choice(this_target_x,
                                                    this_target_y, choice_text)
                        this_contest.choices.append(this_choice)
        return contest_list
Пример #6
0
    def get_title_and_votes_from(self,
                                 image,
                                 regionlist,
                                 croplist,
                                 last_title="NO TITLE"):
        """ given an area known to contain contest title and votes, return info

        The cropped area will contain a title area at the top, 
        followed by voting areas.  Voting areas will
        contain ovals in the oval column.  Descriptive text to the right of
        the ovals will be assigned to each oval based on being at or below
        the oval.

        """
        ov_off = adj(const.vote_target_horiz_offset_inches)
        ov_ht = adj(const.target_height_inches)
        ov_wd = adj(const.target_width_inches)
        ov_end = ov_off + ov_wd
        txt_off = adj(const.candidate_text_horiz_offset_inches)

        choices = []
        crop = image.crop(croplist)
        if croplist[2] == 0 or croplist[3] == 0:
            return []

        dark_zones = self.get_dark_zones(crop)

        next_dark_zones = dark_zones[1:]
        next_dark_zones.append([crop.size[1] - 2, crop.size[1] - 1])
        skipcount = 0

        # for each dark zone, determine the first dark x
        encountered_oval = False
        dzstyle = []
        for dz in dark_zones:
            # crop each dark strip
            # losing the area to the left of the possible vote target
            # and an equivalent area on the right
            dzcrop = crop.crop((ov_off, dz[0], crop.size[0] - ov_off, dz[1]))

            firstx = dzcrop.size[0]
            lastx = 0
            for y in range(dzcrop.size[1]):
                for x in range(dzcrop.size[0]):
                    p0 = dzcrop.getpixel((x, y))
                    if p0[0] < 192:
                        firstx = min(firstx, x)
                        lastx = max(lastx, x)
            lastxindent = dzcrop.size[0] - lastx

            # unfortunately, it is hard to tell a filled oval from a title
            # that begins about the same x offset as ovals; we will
            # recognize that titles come first and are symmetric
            # ovals start at a defined offset and will have a minimum height
            # and, if empty, will match a particular dark/light pattern
            symmetric = (abs(firstx - lastxindent) < adj(0.05))
            tall_enough = (dz[1] - dz[0] >= int(ov_ht * .8))

            ov_pat = oval_pattern(dzcrop, ov_ht, ov_wd, txt_off - ov_off)

            if not encountered_oval and not ov_pat:
                dzstyle.append("T")

            elif tall_enough and firstx <= adj(0.02):
                dzstyle.append("V")
                encountered_oval = True

            elif ((firstx >= (txt_off - ov_off - adj(0.02)))
                  and not tall_enough):
                dzstyle.append("W")
            else:
                dzstyle.append("-")

        contest_instance = None
        choice = None
        title_array = []
        contest_created = False
        for index, style in enumerate(dzstyle):
            if style == "T":
                titlezone = crop.crop(
                    (adj(0.1), dark_zones[index][0], crop.size[0] - adj(0.1),
                     dark_zones[index][1]))
                zonetext = ocr.tesseract(titlezone)
                zonetext = ocr.clean_ocr_text(zonetext)
                zonetext = zonetext.strip()
                zonetext = zonetext.replace("\n", "//").strip()
                title_array.append(zonetext)
            elif style == "V":
                if title_array is not None:
                    zonetext = "/".join(title_array)
                    title_array = None
                    if len(zonetext) < 4: zonetext = last_title
                    contest_instance = Ballot.Contest(croplist[0], croplist[1],
                                                      croplist[2], croplist[3],
                                                      0, zonetext[:80])
                    contest_created = True
                    regionlist.append(contest_instance)
                if not contest_created:
                    print "WARNING: Choice but no contest."
                    pdb.set_trace()
                    continue
                choicezone = crop.crop(
                    (txt_off, dark_zones[index][0], crop.size[0] - adj(0.1),
                     dark_zones[index][1]))
                zonetext = ocr.tesseract(choicezone)
                zonetext = ocr.clean_ocr_text(zonetext)
                zonetext = zonetext.strip()
                zonetext = zonetext.replace("\n", "//").strip()

                # find the y at which the actual oval begins
                # which may be lower than the dark_zone start
                choice_y = dark_zones[index][0]

                # Look up to 0.2 inches beneath beginning of dark zone
                # for an oval darkening the oval region
                contig = 0
                for adj_y in range(adj(0.2)):
                    ovalcrop = crop.crop((ov_off, choice_y + adj_y, ov_end,
                                          choice_y + adj_y + 1))
                    ovalstat = ImageStat.Stat(ovalcrop)
                    if ovalstat.extrema[0][0] < 240:
                        contig += 1
                        if contig > adj(0.03):
                            choice_y += (adj_y - adj(0.03))
                            found = True
                            break
                    else:
                        contig = 0

                choice = Ballot.Choice(croplist[0] + ov_off,
                                       croplist[1] + choice_y, zonetext)
                contest_instance.append(choice)
                #if zonetext.startswith("Randy"):
                #    print "Randy"
                #    pdb.set_trace()
                #    print "Randy"
            elif style == "W" and len(dzstyle) > (
                    index + 1) and dzstyle[index + 1] in "W-":
                if title_array is not None:
                    title_array = None

                try:
                    choice.description = "Writein"
                except:
                    pass
        return regionlist
Пример #7
0
    def build_regions(self, page, tm_list, dpi, stop=True, verbose=False):
        """ Build regions returns a list of Contests found on the page"""
        regionlist = []
        onethird = int(round(dpi / 3.))
        twelfth = int(round(dpi / 12.))
        guard_twentieth = int(round(dpi / 20.))
        guard_tenth = int(round(dpi / 10.))
        guard_fifth = int(round(dpi / 5.))
        cropnum = 0
        column_width = 0
        top_columns = page.top_columns
        tm_list = page.tm_list
        try:
            column_width = top_columns[1][0] - top_columns[0][0]
        except:
            column_width = 2 * dpi
        for top_xy in top_columns:
            matched = []
            ovals = self.column_oval_search(page, top_xy[0])
            textzones = self.column_textzone_search(
                page, top_xy[0] + (column_width / 2))
            ovals.sort()
            textzones.sort()
            zonestart = 0
            zoneend = 0
            for textzone in textzones:
                #print "Processing textzone at (%d, %d)" % (top_xy[0], textzone)
                match = 0
                # any text beginning from 1/16" above the oval
                # to 1/6" below
                # is associated with the oval
                for oval in ovals:
                    if textzone > (oval - dpi / 16) and textzone < (oval +
                                                                    dpi / 4):
                        match = oval
                        #print "-->Match for oval %d" % (oval)
                if match > 0:
                    if zonestart > 0 and zoneend > zonestart:
                        #output last nonmatching textzone
                        croplist = (top_xy[0] - dpi / 8, zonestart,
                                    top_xy[0] + column_width - dpi / 4,
                                    zoneend)
                        #print "Croplist to output", croplist
                        crop = page.image.crop(croplist)

                        # The extensions object offers the ability
                        # to provide the ocr and text cleanup functions
                        # of your choice.
                        text = self.extensions.ocr_engine(crop)
                        text = self.extensions.ocr_cleaner(text)

                        zonestart = 0
                        zoneend = 0
                        print "Contest Text: %s" % (text, )
                        regionlist.append(
                            Ballot.Contest(top_xy[0], zonestart, column_width,
                                           dpi, 0, text))
                    # get text for ovals only once
                    if match not in matched:
                        #print "-->(not previously matched.)"
                        croplist = (top_xy[0] + dpi / 4, match - (dpi / 50),
                                    top_xy[0] + column_width - dpi / 4,
                                    match + (dpi / 3))
                        #print croplist
                        crop = page.image.crop(croplist)
                        text = self.extensions.ocr_engine(crop)
                        text = self.extensions.ocr_cleaner(text)
                        print "Oval (%d, %d): %s" % (top_xy[0], match,
                                                     text.strip())
                        if len(regionlist) > 0:
                            regionlist[-1].append(
                                #TODO add x2, y2, remove text
                                Ballot.Choice(top_xy[0], match, text))
                        # now enter the just matched oval into a list
                        # of already printed ovals
                        matched.append(match)
                else:
                    if zonestart == 0:
                        zonestart = textzone
                    # textzone includes both the 1/32 which may have contributed
                    # a dark pixel into the triggering crop,
                    # and an additional 16th inch to take into account the chance
                    # there was another dark zone not quite long enough
                    # to become a text zone
                    zoneend = textzone + (dpi / 32) + (dpi / 16)
                    #print "Textzone at y %d is not associated with an oval." % (textzone, )
        return regionlist
Пример #8
0
def build_template(im, dpi, code, xoff, yoff, tilt, front=True):
    """build template of arrow locations

    When a ballot image is used for template construction, 
    it is assumed that code will have derotated it first!

    This code is not yet general; it assumes two arrow columns
    at set locations.  It locates arrows within those locations
    by searching for at least 0.05" of vertical contiguous black
    in locations which would correspond to the arrow head and
    the arrow tail, skipping at least the first vertical 1.5" on the front
    and the bottom 1.2" on both sides.

    The search for arrows begins only beneath a 0.6" long solid
    black bar (first channel <= 128 in range 0..255) at least 0.05" tall.
    """
    # find the locations of the arrow columns
    # relative to xoff, yoff, and taking tilt into account
    #location_list = [(dpi,xoff,yoff,tilt)]
    # first set will be at just under 3" to right of xoff
    # next set will be at 6" to right of xoff.
    # Both sets will be at least 0.08" tall after 0.1 inches.
    iround = lambda x: int(round(x))
    adj = lambda f: int(round(const.dpi * f))
    regionlist = []
    n = 0
    for x in (xoff + adj(column1_offset), xoff + adj(column2_offset)):
        # skip the code block if you're on a front
        if n == 0 and front:
            starty = int(yoff + int(1.5 * dpi))
        else:
            starty = int(yoff - 1)
        adjx, adjy = x, starty  # XXX assuming ballot derotated by here
        # turn search on only when 0.06" of thick black line encountered
        contig = 0
        for y in range(adjy, im.size[1]):
            all_black_line = True
            for x2 in range(int(adjx + adj(0.1)), int(adjx + adj(0.5))):
                pix = im.getpixel((x2, y))
                if pix[0] > 128:
                    all_black_line = False
                    break
            if all_black_line:
                contig = contig + 1
            else:
                contig = 0
            if contig > adj(0.05):
                if n == 0: starty = y
                break
        if n == 0: starty = starty + adj(0.2)
        # starty is now 0.2 inches below the first 0.6" dash of first column;
        # arrows may be encountered from here until the column's height less
        # less 1.1 inches
        contig = 0
        # search at .15 inches in for first half of arrow
        searchx1 = x + adj(0.15)
        # search at .55 inches in for second half of arrow
        searchx2 = x + adj(0.55)
        skip = 0
        contest_x = 0
        contest_y = 0
        # stop looking for arrows at 1.2 inches up from the bottom
        for y in range(int(starty), int(im.size[1] - adj(1.2))):
            if skip > 0:
                skip = skip - 1
                continue
            pix1 = im.getpixel((searchx1, y))
            pix2 = im.getpixel((searchx2, y))
            # look for .05 vertical inches of dark
            # in vertical strips that contain left
            # and right halves of arrow
            if pix1[0] < 128 and pix2[0] < 128:
                contig = contig + 1
                if contig > adj(0.05):
                    # this is an arrow
                    ll_x, ll_y = ((x, y))

                    if ll_x > (im.size[0] - 5):
                        ll_x = (im.size[0] - 5)
                    if ll_y > (im.size[1] - adj(0.5)):
                        ll_y = (im.size[1] - adj(0.5))
                    if ll_x < adj(2.5):
                        ll_x = adj(2.5)
                    if ll_y < adj(0.5):
                        ll_y = adj(0.5)
                    text, contest_text, contest_loc = get_text_for_arrow_at(
                        im, ll_x, ll_y - contig - (0.04 * dpi), const.dpi)
                    # new contest location? append contest, store contest size
                    if ((contest_x != contest_loc[0])
                            and contest_y != contest_loc[1]):
                        regionlist.append(
                            Ballot.Contest(contest_x, contest_y, 199, adj(5),
                                           0, contest_text))
                        contest_x = contest_loc[0]
                        contest_y = contest_loc[1]
                    else:
                        # update the bottom of the contest's bounding box
                        regionlist[-1].h = ll_y + adj(0.2)
                    #add x2, y2
                    regionlist[-1].append(Ballot.Choice(ll_x, ll_y, text))

                    # skip past arrow
                    #y = y + (0.2 * dpi)
                    skip = adj(0.2)
                    # reset contig
                    contig = 0
    return regionlist
Пример #9
0
def hart_build_contests(image,
                        pot_hlines,
                        vboxes,
                        column_start,
                        column_width,
                        dpi=300,
                        extensions=None):
    """Merge horiz lines and vote boxes to get contests and choice offsets."""
    regionlist = []
    contest_description_zones = []
    last_contest = 0
    first_above = 0
    for vbox in vboxes:
        vbox_y = vbox[1]
        for hline in pot_hlines:
            if hline < vbox_y:
                first_above = hline
        if first_above != last_contest:
            last_contest = first_above
            contest_description_zones.append((first_above, vbox_y))
    #print "Contest description zones",contest_description_zones
    for contest in contest_description_zones:
        # crop
        crop = image.crop((column_start, contest[0],
                           column_start + column_width, contest[1]))
        # get text
        zonetext = extensions.ocr_engine(crop)
        zonetext = extensions.ocr_cleaner(zonetext)
        # create Contest, append to regionlist
        regionlist.append(
            Ballot.Contest(column_start, contest[0],
                           column_start + column_width, contest[1], 0,
                           zonetext))

    contest_description_zones.reverse()
    for vbox in vboxes:
        # locate the last contest description zone above vbox
        # and assign vbox to that contest description zone

        for contest in contest_description_zones:
            # first contest above vbox gets vbox as choice
            if contest[0] < vbox[1]:
                #print "Vbox at",vbox[1],"in contest at",contest
                # crop area to right of vbox
                # get and clean text
                crop = image.crop((
                    vbox[0] + dpi / 3 + dpi / 30,  #!!!
                    vbox[1] - dpi / 100,  #!!!
                    vbox[0] + column_width - (dpi / 2),  #!!!
                    vbox[1] + (dpi / 2)))
                choice_text = extensions.ocr_engine(crop)  #!!!
                # take only first line of choice
                choice_text = extensions.ocr_cleaner(choice_text).split("/")[0]

                # search regionlist for matching Contest, append
                #match.append(Ballot.Choice(...,choice_text))
                for rcontest in regionlist:
                    if rcontest.y == contest[0] and rcontest.x == column_start:
                        rcontest.append(
                            Ballot.Choice(vbox[0], vbox[1], choice_text))
                        break
                break
    logger = logging.getLogger('')
    for contest in regionlist:
        logger.info("%d %d %s" % (contest.x, contest.y, contest.description))
        for choice in contest.choices:
            logger.info(" %d %d %s" % (choice.x, choice.y, choice.description))

    return regionlist