Пример #1
0
 def getTransitions(self, frm):
     tos = iulib.intarray()
     symbols = iulib.intarray()
     costs = iulib.floatarray()
     inputs = iulib.intarray()
     self.comp.getTransitions(tos, symbols, costs, inputs, frm)
     return (iulib.numpy(tos, 'i'), iulib.numpy(symbols, 'i'),
             iulib.numpy(costs), iulib.numpy(inputs, 'i'))
Пример #2
0
def beam_search(lattice, lmodel, beam):
    """Perform a beam search through the lattice and language model, given the
    beam size.  Returns (v1,v2,input_symbols,output_symbols,costs)."""
    v1 = iulib.intarray()
    v2 = iulib.intarray()
    ins = iulib.intarray()
    outs = iulib.intarray()
    costs = iulib.floatarray()
    ocrofstll.beam_search(v1, v2, ins, outs, costs, native_fst(lattice),
                          native_fst(lmodel), beam)
    return (iulib.numpy(v1, 'i'), iulib.numpy(v2, 'i'), iulib.numpy(ins, 'i'),
            iulib.numpy(outs, 'i'), iulib.numpy(costs, 'f'))
Пример #3
0
def write_line_segmentation(file, seg_):
    """Write the line segmentation to the output file, changing black
    background to write."""
    seg = iulib.intarray()
    seg.copy(seg_)
    ocropus.make_line_segmentation_white(seg)
    iulib.write_image_packed(file, seg)
Пример #4
0
def read_page_segmentation(name, black=1):
    """Write a numpy page segmentation (rank 3, type='B' RGB image.)"""
    if not os.path.exists(name): raise IOError(name)
    pseg = iulib.intarray()
    iulib.read_image_packed(pseg, name)
    if black: iulib.make_page_segmentation_black(pseg)
    return narray2pseg(pseg)
Пример #5
0
def read_line_segmentation(name, black=1):
    """Write a numpy line segmentation."""
    if not os.path.exists(name): raise IOError(name)
    lseg = iulib.intarray()
    iulib.read_image_packed(lseg, name)
    if black: iulib.make_line_segmentation_black(lseg)
    return narray2lseg(lseg)
Пример #6
0
def pseg2narray(pseg):
    """Convert a page segmentation (rank 3, RGB) to an narray."""
    checknp(pseg)
    assert pseg.dtype=='B' and pseg.ndim==3
    r = numpy2narray(ascontiguousarray(pseg[:,:,0]))
    g = numpy2narray(ascontiguousarray(pseg[:,:,1]))
    b = numpy2narray(ascontiguousarray(pseg[:,:,2]))
    rgb = iulib.intarray()
    iulib.pack_rgb(rgb,r,g,b)
    return rgb
Пример #7
0
def pseg2narray(pseg):
    """Convert a page segmentation (rank 3, RGB) to an narray."""
    checknp(pseg)
    assert pseg.dtype == 'B' and pseg.ndim == 3
    r = numpy2narray(ascontiguousarray(pseg[:, :, 0]))
    g = numpy2narray(ascontiguousarray(pseg[:, :, 1]))
    b = numpy2narray(ascontiguousarray(pseg[:, :, 2]))
    rgb = iulib.intarray()
    iulib.pack_rgb(rgb, r, g, b)
    return rgb
def recognize_and_align(image,linerec,lmodel,beam=1000,nocseg=0):
    """Perform line recognition with the given line recognizer and
    language model.  Outputs an object containing the result (as a
    Python string), the costs, the rseg, the cseg, the lattice and the
    total cost.  The recognition lattice needs to have rseg's segment
    numbers as inputs (pairs of 16 bit numbers); SimpleGrouper
    produces such lattices.  cseg==None means that the connected
    component renumbering failed for some reason."""

    # run the recognizer
    lattice = ocropus.make_OcroFST()
    rseg = iulib.intarray()
    linerec.recognizeLineSeg(lattice,rseg,image)

    # perform the beam search through the lattice and the model
    v1 = iulib.intarray()
    v2 = iulib.intarray()
    ins = iulib.intarray()
    outs = iulib.intarray()
    costs = iulib.floatarray()
    ocropus.beam_search(v1,v2,ins,outs,costs,lattice,lmodel,beam)

    # do the conversions
    # print "OUTS",[outs.at(i) for i in range(outs.length())]
    result = intarray_as_string(outs,skip0=1)
    # print "RSLT",result

    # compute the cseg
    if not nocseg:
        rmap = rseg_map(ins)
        cseg = None
        if len(rmap)>1:
            cseg = iulib.intarray()
            cseg.copy(rseg)
            try:
                for i in range(cseg.length()):
                    cseg.put1d(i,int(rmap[rseg.at1d(i)]))
            except IndexError:
                raise Exception("renumbering failed")
    else:
        cseg = None

    # return everything we computed
    return Record(image=image,
                  output=result,
                  raw=outs,
                  costs=costs,
                  rseg=rseg,
                  cseg=cseg,
                  lattice=lattice,
                  cost=iulib.sum(costs))
Пример #9
0
def load_gt(file):
    """check for the presence of cseg and txt file
    and use them to label characters if available"""
    cfile = re.sub(r'\.png$','.cseg.gt.png',file)
    tfile = re.sub(r'\.png$','.gt.txt',file)
    if not os.path.exists(cfile):
        cfile = re.sub(r'\.png$','.cseg.png',file)
        tfile = re.sub(r'\.png$','.txt',file)
    if os.path.exists(cfile):
        cseg = iulib.intarray()
        iulib.read_image_packed(cseg,cfile)
        ocropus.make_line_segmentation_black(cseg)
    else:
        cseg = None
    if os.path.exists(tfile):
        text = open(tfile).read()
    else:
        text = None
    return cseg,text
Пример #10
0
def chars_no_gt(files,segmenter=default_segmenter,grouper=default_grouper):
    for file in files:
        print "# loading",file
        image = iulib.bytearray()
        try:
            iulib.read_image_gray(image,file)
            segmentation = iulib.intarray()
            segmenter.charseg(segmentation,image)
            ocropus.make_line_segmentation_black(segmentation)
            iulib.renumber_labels(segmentation,1)
            grouper.setSegmentation(segmentation)
            iulib.sub(255,image)
            for i in range(grouper.length()):
                cls = None
                raw = iulib.bytearray()
                mask = iulib.bytearray()
                grouper.extractWithMask(raw,mask,image,i,1)
                yield raw,mask,cls
        except NoException:
            print "FAILED",sys.exc_info()[0]
            continue
Пример #11
0
def cseg_chars(files,suffix="gt",segmenter=None,grouper=None,has_gt=1,verbose=0):
    """Iterate through the characters contained in a cseg file.
    Argument should be a list of image files.  Given "line.png",
    uses "line.cseg.gt.png" and "line.gt.txt" if suffix="gt".
    Returns an iterator of raw,mask,cls. Attempts to align
    with ground truth unless has_gt=0."""
    # also accept individual files
    if type(files)==type(""):
        files = [files]
    # if no grouper is given, just instantiate a simple grouper
    if not grouper:
        grouper = ocropus.make_IGrouper("SimpleGrouper")
        grouper.pset("maxrange",1)
    # allow empty suffix as a special case
    if suffix is None:
        suffix = ""
    if suffix!="":
        suffix = "."+suffix
    # now iterate through all the image files
    for file in files:
        if verbose:
            print "# loading",file
        try:
            # load the text line
            image = iulib.bytearray()
            iulib.read_image_gray(image,file)
            base = re.sub("\.png$","",file)
            # load segmentation ground truth
            cseg_file = base+".cseg"+suffix+".png"
            print file,cseg_file
            cseg = iulib.intarray()
            if not os.path.exists(cseg_file):
                raise IOError(cseg_file)
            iulib.read_image_packed(cseg,cseg_file)
            ocropus.make_line_segmentation_black(cseg)
            # load text
            if has_gt:
                text_file = base+suffix+".txt"
                text = open(text_file).read()
                if text[-1]=="\n": text = text[:-1]
                if len(text)>iulib.max(cseg):
                    text = re.sub(r'\s+','',text)
                utext = iulib.ustrg()
                utext.assign(text) # FIXME should do UTF8 or u""
                if utext.length()!=iulib.max(cseg):
                    raise BadTranscript("mismatch transcript %d maxseg %d"%(utext.length(),iulib.max(cseg)))
                if verbose:
                    print "#",utext.length(),iulib.max(cseg)
            # perform the segmentation
            segmentation = iulib.intarray()
            if segmenter:
                segmenter.charseg(segmentation,image)
                ocropus.make_line_segmentation_black(segmentation)
                iulib.renumber_labels(segmentation,1)
            else:
                segmentation.copy(cseg)

            # invert the image, since that's the way we're doing
            # all the remaining processing
            iulib.sub(255,image)

            # set the segmentation in preparation for loading
            if has_gt:
                grouper.setSegmentationAndGt(segmentation,cseg,utext)
            else:
                grouper.setSegmentation(segmentation)

            # now iterate through the segments of the line
            for i in range(grouper.length()):
                cls = None
                if has_gt:
                    cls = grouper.getGtClass(i)
                    if cls==-1:
                        cls = ""
                    else:
                        cls = chr(cls)
                raw = iulib.bytearray()
                mask = iulib.bytearray()
                grouper.extractWithMask(raw,mask,image,i,1)
                # print "component",i,N(segments),amax(N(raw)),raw.dim(0),raw.dim(1)
                # imshow(NI(raw)); gray(); show()
                yield Record(raw=raw,mask=mask,cls=cls,index=i,
                             bbox=grouper.boundingBox(i))
        except IOError,e:
            raise e
        except:
Пример #12
0
 def recognizeLine(self, lattice, image):
     "Recognize a line, outputting a recognition lattice." ""
     rseg = iulib.intarray()
     return self.recognizeLineSeg(lattice, rseg, image)
Пример #13
0
    def recognizeLineSeg(self, lattice, rseg, image):
        """Recognize a line.
        
        lattice: result of recognition
        rseg: intarray where the raw segmentation will be put
        image: line image to be recognized"""

        if self.debug: print "starting"

        ## increase segmentation scale for large lines
        h = image.dim(1)
        s = max(2.0, h / 15.0)
        try:
            self.segmenter.pset("cost_smooth", s)
            if s > 2.0: print "segmentation scale", s
        except:
            pass

        ## compute the raw segmentation
        if self.debug: print "segmenting"
        self.segmenter.charseg(rseg, image)
        if self.debug: print "done"
        ocropus.make_line_segmentation_black(rseg)
        if self.debug:
            print "here"
            clf()
            subplot(4, 1, 1)
            show_segmentation(rseg)
            draw()
            print "there"
        iulib.renumber_labels(rseg, 1)
        self.grouper.setSegmentation(rseg)

        # compute the median segment height
        heights = []
        for i in range(self.grouper.length()):
            bbox = self.grouper.boundingBox(i)
            heights.append(bbox.height())
        mheight = median(array(heights))
        self.mheight = mheight

        # invert the input image (make a copy first)
        old = image
        image = iulib.bytearray()
        image.copy(old)
        iulib.sub(255, image)

        # allocate working arrays
        segs = iulib.intarray()
        raw = iulib.bytearray()
        mask = iulib.bytearray()

        # now iterate through the characters and collect candidates
        inputs = []
        for i in range(self.grouper.length()):
            # get the bounding box for the character (used later)
            bbox = self.grouper.boundingBox(i)
            aspect = bbox.height() * 1.0 / bbox.width()

            # extract the character image (and optionally display it)
            self.grouper.extractWithMask(raw, mask, image, i, 1)
            char = NI(raw)
            char = char / float(amax(char))
            if self.debug:
                subplot(4, 1, 2)
                print i, (bbox.x0, bbox.y0, bbox.x1, bbox.y1)
                cla()
                imshow(char, cmap=cm.gray)
                draw()
                print "hit RETURN to continue"
                raw_input()
            inputs.append(FI(char))

        # classify the candidates (using multithreading, where available)
        results = utils.omp_classify(self.cmodel, inputs)

        # now convert the classified outputs into a list of candidate records
        candidates = []
        for i in range(len(inputs)):
            # compute the classifier output for this character
            # print self.cmodel.info()
            raw = inputs[i]
            char = NI(raw)
            bbox = self.grouper.boundingBox(i)
            outputs = results[i]
            outputs = [(x[0], -log(x[1])) for x in outputs]
            candidates.append(
                Record(index=i,
                       image=char,
                       raw=raw,
                       outputs=outputs,
                       bbox=bbox))

        # keep the characters around for debugging (used by ocropus-showlrecs)
        self.chars = candidates

        # update the per-character costs based on a text line model
        if self.linemodel is not None:
            self.linemodel.linecosts(candidates, image)

        # compute a list of space costs for each candidate character
        spacecosts = self.spacemodel.spacecosts(candidates, image)

        for c in candidates:
            i = c.index
            raw = c.raw
            char = c.image
            outputs = c.outputs

            # Add a skip transition with the pixel width as cost.
            # This ensures that the lattice is at least connected.
            # Note that for typical character widths, this is going
            # to be much larger than any per-charcter cost.
            self.grouper.setClass(i, ocropus.L_RHO,
                                  self.rho_scale * raw.dim(0))

            # add the top classes to the lattice
            outputs.sort(key=lambda x: x[1])
            s = iulib.ustrg()
            for cls, cost in outputs[:self.best]:
                # don't add the reject class (written as "~")
                if cls == "~": continue

                # don't add anything with a cost higher than the reject cost
                if cost > self.reject_cost: continue

                # for anything else, just add the classified character to the grouper
                s = iulib.unicode2ustrg(cls)
                self.grouper.setClass(i, s, min(cost, self.maxcost))

                # add the computed space costs to the grouper as well
                self.grouper.setSpaceCost(i, spacecosts[i][0],
                                          spacecosts[i][1])

        # extract the recognition lattice from the grouper
        self.grouper.getLattice(lattice)

        # return the raw segmentation as a result
        return rseg
Пример #14
0
#fst1.addTransition(s2, s3, 1002, 10.0,1002)
fst1.addTransition(s2, s3, 14, 20.0, 14)

a0 = fst2.newState()
a1 = fst2.newState()
a2= fst2.newState()
a3 = fst2.newState()
a4 = fst2.newState()
a5 = fst2.newState()
a6 = fst2.newState()
#O=15, t=20  c=3 R=18
fst2.setAccept(a4);
fst2.setAccept(a5);
fst2.setAccept(a6);
fst2.addTransition(a0, a1, 3, 23.0, 3);#c
fst2.addTransition(a1, a3, 15, 1.0, 15);#O
fst2.addTransition(a1, a2, 1, 20.0, 1);#a
fst2.addTransition(a2, a4, 20, 40.0, 20);#T
fst2.addTransition(a2, a5, 18, 18.0, 18);#R
fst2.addTransition(a3, a6, 23, 13.0, 23);#W

s = iulib.ustrg()
v1 = iulib.intarray()
v2 = iulib.intarray()
ins = iulib.intarray()
outs = iulib.intarray()
costs = iulib.floatarray()
n=1000
ocrofstll.beam_search(v1,v2,ins,outs,costs,fst1,fst2,n)

Пример #15
0
    def recognizeLineSeg(self,lattice,rseg,image):
        """Recognize a line.
        lattice: result of recognition
        rseg: intarray where the raw segmentation will be put
        image: line image to be recognized"""

        ## compute the raw segmentation
        self.segmenter.charseg(rseg,image)
        ocropus.make_line_segmentation_black(rseg)
        if self.debug: show_segmentation(rseg)
        iulib.renumber_labels(rseg,1)
        self.grouper.setSegmentation(rseg)

        # compute the median segment height
        heights = []
        for i in range(self.grouper.length()):
            bbox = self.grouper.boundingBox(i)
            heights.append(bbox.height())
        mheight = median(array(heights))
        self.mheight = mheight

        # invert the input image (make a copy first)
        old = image; image = iulib.bytearray(); image.copy(old)
        iulib.sub(255,image)

        # allocate working arrays
        segs = iulib.intarray()
        raw = iulib.bytearray()
        mask = iulib.bytearray()

        # this holds the list of recognized characters if keep!=0
        self.chars = []
        
        # now iterate through the characters
        for i in range(self.grouper.length()):
            # get the bounding box for the character (used later)
            bbox = self.grouper.boundingBox(i)
            aspect = bbox.height()*1.0/bbox.width()

            # extract the character image (and optionally display it)
            self.grouper.extractWithMask(raw,mask,image,i,1)
            char = NI(raw)
            char = char / float(amax(char))
            if self.debug:
                imshow(char)
                raw_input()

            # Add a skip transition with the pixel width as cost.
            # This ensures that the lattice is at least connected.
            # Note that for typical character widths, this is going
            # to be much larger than any per-charcter cost.
            self.grouper.setClass(i,ocropus.L_RHO,self.rho_scale*raw.dim(0))

            # compute the classifier output for this character
            # print self.cmodel.info()
            outputs = self.cmodel.coutputs(FI(char))
            outputs = [(x[0],-log(x[1])) for x in outputs]
            self.chars.append(Record(index=i,image=char,outputs=outputs))
            
            # add the top classes to the lattice
            outputs.sort(key=lambda x:x[1])
            s = iulib.ustrg()
            for cls,cost in outputs[:self.best]:
                # don't add the reject class (written as "~")
                if cls=="~": continue

                # letters are never small, so we skip small bounding boxes that
                # are categorized as letters; this is an ugly special case, but
                # it is quite common
                ucls = cls
                if type(cls)==str: ucls = unicode(cls,"utf-8")
                category = unicodedata.category(ucls[0])
                if bbox.height()<self.min_height*mheight and category[0]=="L":
                    # add an empty transition to allow skipping junk
                    # (commented out right now because I'm not sure whether
                    # the grouper can handle it; FIXME)
                    # self.grouper.setClass(i,"",1.0)
                    continue

                # for anything else, just add the classified character to the grouper
                s.assign(cls)
                self.grouper.setClass(i,s,min(cost,self.maxcost))
                # FIXME better space handling
                self.grouper.setSpaceCost(i,0.5,0.0)

        # extract the recognition lattice from the grouper
        self.grouper.getLattice(lattice)

        # return the raw segmentation as a result
        return rseg
Пример #16
0
def cc_statistics(image,dpi,min_pt,max_pt,verbose=0):
    w = image.dim(0)
    h = image.dim(1)

    ## compute connected component widths and heights
    components = iulib.intarray()
    components.copy(image)
    iulib.sub(iulib.max(components),components)
    iulib.label_components(components)
    boxes = iulib.rectarray()
    iulib.bounding_boxes(boxes,components)
    n = boxes.length()
    widths = array([boxes.at(i).width() for i in range(1,n)])
    heights = array([boxes.at(i).height() for i in range(1,n)])

    ## we consider "normal" components that are between 1/3 of the 
    ## size of the minimum sized font and the full size of the 
    ## maxmimum sized font; to compute this, we need to convert from
    ## font sizes in pt to pixel sizes, using the given dpi
    maxs = maximum(widths,heights)
    min_px_em = min_pt*dpi/72.0
    min_px = (1.0/3.0) * min_px_em
    max_px = max_pt*dpi/72.0

    ## compute the total page area covered by bounding boxes of connected
    ## components (we don't bother to try to avoid double counts in overlapping boxes)
    covered = sum(widths*heights)*1.0/w/h

    ## small components are those whose maximum dimension is smaller that the min size
    small = (maxs<min_px)

    ## large components have at least one dimension better than the max size
    large = (maxs>max_px)

    ## biggish components have both dimensions bigger than the small size (this
    ## excludes "." and "-" and is used for aspect ratio computations)
    biggish = ((widths>min_px)&(heights>min_px))

    ## normal boxes are those that are neither small nor large
    normal = ~(small|large)

    ## absolute density of characters per square inch
    density = n*dpi**2*1.0/w/h

    ## relative density of characters per em
    h_density = n/(w/min_px_em)

    ## print some information
    if verbose:
        alert("# min",min_px,"max",max_px)
        alert("# normal",sum(normal),"small",sum(small),"large",sum(large))
        alert("# density",density)
        alert("# h_density",h_density)
        alert("# covered",covered)


    ## compute aspect ratio statistics; we're using a right-trimmed mean of
    ## biggish components; this means that we exclude characters like "-"
    ## from the computation (because they are not biggish), and we also exclude
    ## large connected components such as rules (since they are trimmed off)
    ## the remaining mean should represent the mean of connected components that
    ## make up the bulk of the text on the page
    aspect = heights*1.0/widths
    aspect = aspect[biggish]
    a_mean = mean(trim1(aspect,0.1,tail='right'))

    result = Record(
        biggish = sum(biggish),
        normal = sum(normal),
        small = sum(small),
        large = sum(large),
        density = density,
        h_density = h_density,
        a_mean = a_mean,
        covered=covered,
    )

    return result