def getTransitions(self, frm): tos = iulib.intarray() symbols = iulib.intarray() costs = iulib.floatarray() inputs = iulib.intarray() self.comp.getTransitions(tos, symbols, costs, inputs, frm) return (iulib.numpy(tos, 'i'), iulib.numpy(symbols, 'i'), iulib.numpy(costs), iulib.numpy(inputs, 'i'))
def beam_search(lattice, lmodel, beam): """Perform a beam search through the lattice and language model, given the beam size. Returns (v1,v2,input_symbols,output_symbols,costs).""" v1 = iulib.intarray() v2 = iulib.intarray() ins = iulib.intarray() outs = iulib.intarray() costs = iulib.floatarray() ocrofstll.beam_search(v1, v2, ins, outs, costs, native_fst(lattice), native_fst(lmodel), beam) return (iulib.numpy(v1, 'i'), iulib.numpy(v2, 'i'), iulib.numpy(ins, 'i'), iulib.numpy(outs, 'i'), iulib.numpy(costs, 'f'))
def write_line_segmentation(file, seg_): """Write the line segmentation to the output file, changing black background to write.""" seg = iulib.intarray() seg.copy(seg_) ocropus.make_line_segmentation_white(seg) iulib.write_image_packed(file, seg)
def read_page_segmentation(name, black=1): """Write a numpy page segmentation (rank 3, type='B' RGB image.)""" if not os.path.exists(name): raise IOError(name) pseg = iulib.intarray() iulib.read_image_packed(pseg, name) if black: iulib.make_page_segmentation_black(pseg) return narray2pseg(pseg)
def read_line_segmentation(name, black=1): """Write a numpy line segmentation.""" if not os.path.exists(name): raise IOError(name) lseg = iulib.intarray() iulib.read_image_packed(lseg, name) if black: iulib.make_line_segmentation_black(lseg) return narray2lseg(lseg)
def pseg2narray(pseg): """Convert a page segmentation (rank 3, RGB) to an narray.""" checknp(pseg) assert pseg.dtype=='B' and pseg.ndim==3 r = numpy2narray(ascontiguousarray(pseg[:,:,0])) g = numpy2narray(ascontiguousarray(pseg[:,:,1])) b = numpy2narray(ascontiguousarray(pseg[:,:,2])) rgb = iulib.intarray() iulib.pack_rgb(rgb,r,g,b) return rgb
def pseg2narray(pseg): """Convert a page segmentation (rank 3, RGB) to an narray.""" checknp(pseg) assert pseg.dtype == 'B' and pseg.ndim == 3 r = numpy2narray(ascontiguousarray(pseg[:, :, 0])) g = numpy2narray(ascontiguousarray(pseg[:, :, 1])) b = numpy2narray(ascontiguousarray(pseg[:, :, 2])) rgb = iulib.intarray() iulib.pack_rgb(rgb, r, g, b) return rgb
def recognize_and_align(image,linerec,lmodel,beam=1000,nocseg=0): """Perform line recognition with the given line recognizer and language model. Outputs an object containing the result (as a Python string), the costs, the rseg, the cseg, the lattice and the total cost. The recognition lattice needs to have rseg's segment numbers as inputs (pairs of 16 bit numbers); SimpleGrouper produces such lattices. cseg==None means that the connected component renumbering failed for some reason.""" # run the recognizer lattice = ocropus.make_OcroFST() rseg = iulib.intarray() linerec.recognizeLineSeg(lattice,rseg,image) # perform the beam search through the lattice and the model v1 = iulib.intarray() v2 = iulib.intarray() ins = iulib.intarray() outs = iulib.intarray() costs = iulib.floatarray() ocropus.beam_search(v1,v2,ins,outs,costs,lattice,lmodel,beam) # do the conversions # print "OUTS",[outs.at(i) for i in range(outs.length())] result = intarray_as_string(outs,skip0=1) # print "RSLT",result # compute the cseg if not nocseg: rmap = rseg_map(ins) cseg = None if len(rmap)>1: cseg = iulib.intarray() cseg.copy(rseg) try: for i in range(cseg.length()): cseg.put1d(i,int(rmap[rseg.at1d(i)])) except IndexError: raise Exception("renumbering failed") else: cseg = None # return everything we computed return Record(image=image, output=result, raw=outs, costs=costs, rseg=rseg, cseg=cseg, lattice=lattice, cost=iulib.sum(costs))
def load_gt(file): """check for the presence of cseg and txt file and use them to label characters if available""" cfile = re.sub(r'\.png$','.cseg.gt.png',file) tfile = re.sub(r'\.png$','.gt.txt',file) if not os.path.exists(cfile): cfile = re.sub(r'\.png$','.cseg.png',file) tfile = re.sub(r'\.png$','.txt',file) if os.path.exists(cfile): cseg = iulib.intarray() iulib.read_image_packed(cseg,cfile) ocropus.make_line_segmentation_black(cseg) else: cseg = None if os.path.exists(tfile): text = open(tfile).read() else: text = None return cseg,text
def chars_no_gt(files,segmenter=default_segmenter,grouper=default_grouper): for file in files: print "# loading",file image = iulib.bytearray() try: iulib.read_image_gray(image,file) segmentation = iulib.intarray() segmenter.charseg(segmentation,image) ocropus.make_line_segmentation_black(segmentation) iulib.renumber_labels(segmentation,1) grouper.setSegmentation(segmentation) iulib.sub(255,image) for i in range(grouper.length()): cls = None raw = iulib.bytearray() mask = iulib.bytearray() grouper.extractWithMask(raw,mask,image,i,1) yield raw,mask,cls except NoException: print "FAILED",sys.exc_info()[0] continue
def cseg_chars(files,suffix="gt",segmenter=None,grouper=None,has_gt=1,verbose=0): """Iterate through the characters contained in a cseg file. Argument should be a list of image files. Given "line.png", uses "line.cseg.gt.png" and "line.gt.txt" if suffix="gt". Returns an iterator of raw,mask,cls. Attempts to align with ground truth unless has_gt=0.""" # also accept individual files if type(files)==type(""): files = [files] # if no grouper is given, just instantiate a simple grouper if not grouper: grouper = ocropus.make_IGrouper("SimpleGrouper") grouper.pset("maxrange",1) # allow empty suffix as a special case if suffix is None: suffix = "" if suffix!="": suffix = "."+suffix # now iterate through all the image files for file in files: if verbose: print "# loading",file try: # load the text line image = iulib.bytearray() iulib.read_image_gray(image,file) base = re.sub("\.png$","",file) # load segmentation ground truth cseg_file = base+".cseg"+suffix+".png" print file,cseg_file cseg = iulib.intarray() if not os.path.exists(cseg_file): raise IOError(cseg_file) iulib.read_image_packed(cseg,cseg_file) ocropus.make_line_segmentation_black(cseg) # load text if has_gt: text_file = base+suffix+".txt" text = open(text_file).read() if text[-1]=="\n": text = text[:-1] if len(text)>iulib.max(cseg): text = re.sub(r'\s+','',text) utext = iulib.ustrg() utext.assign(text) # FIXME should do UTF8 or u"" if utext.length()!=iulib.max(cseg): raise BadTranscript("mismatch transcript %d maxseg %d"%(utext.length(),iulib.max(cseg))) if verbose: print "#",utext.length(),iulib.max(cseg) # perform the segmentation segmentation = iulib.intarray() if segmenter: segmenter.charseg(segmentation,image) ocropus.make_line_segmentation_black(segmentation) iulib.renumber_labels(segmentation,1) else: segmentation.copy(cseg) # invert the image, since that's the way we're doing # all the remaining processing iulib.sub(255,image) # set the segmentation in preparation for loading if has_gt: grouper.setSegmentationAndGt(segmentation,cseg,utext) else: grouper.setSegmentation(segmentation) # now iterate through the segments of the line for i in range(grouper.length()): cls = None if has_gt: cls = grouper.getGtClass(i) if cls==-1: cls = "" else: cls = chr(cls) raw = iulib.bytearray() mask = iulib.bytearray() grouper.extractWithMask(raw,mask,image,i,1) # print "component",i,N(segments),amax(N(raw)),raw.dim(0),raw.dim(1) # imshow(NI(raw)); gray(); show() yield Record(raw=raw,mask=mask,cls=cls,index=i, bbox=grouper.boundingBox(i)) except IOError,e: raise e except:
def recognizeLine(self, lattice, image): "Recognize a line, outputting a recognition lattice." "" rseg = iulib.intarray() return self.recognizeLineSeg(lattice, rseg, image)
def recognizeLineSeg(self, lattice, rseg, image): """Recognize a line. lattice: result of recognition rseg: intarray where the raw segmentation will be put image: line image to be recognized""" if self.debug: print "starting" ## increase segmentation scale for large lines h = image.dim(1) s = max(2.0, h / 15.0) try: self.segmenter.pset("cost_smooth", s) if s > 2.0: print "segmentation scale", s except: pass ## compute the raw segmentation if self.debug: print "segmenting" self.segmenter.charseg(rseg, image) if self.debug: print "done" ocropus.make_line_segmentation_black(rseg) if self.debug: print "here" clf() subplot(4, 1, 1) show_segmentation(rseg) draw() print "there" iulib.renumber_labels(rseg, 1) self.grouper.setSegmentation(rseg) # compute the median segment height heights = [] for i in range(self.grouper.length()): bbox = self.grouper.boundingBox(i) heights.append(bbox.height()) mheight = median(array(heights)) self.mheight = mheight # invert the input image (make a copy first) old = image image = iulib.bytearray() image.copy(old) iulib.sub(255, image) # allocate working arrays segs = iulib.intarray() raw = iulib.bytearray() mask = iulib.bytearray() # now iterate through the characters and collect candidates inputs = [] for i in range(self.grouper.length()): # get the bounding box for the character (used later) bbox = self.grouper.boundingBox(i) aspect = bbox.height() * 1.0 / bbox.width() # extract the character image (and optionally display it) self.grouper.extractWithMask(raw, mask, image, i, 1) char = NI(raw) char = char / float(amax(char)) if self.debug: subplot(4, 1, 2) print i, (bbox.x0, bbox.y0, bbox.x1, bbox.y1) cla() imshow(char, cmap=cm.gray) draw() print "hit RETURN to continue" raw_input() inputs.append(FI(char)) # classify the candidates (using multithreading, where available) results = utils.omp_classify(self.cmodel, inputs) # now convert the classified outputs into a list of candidate records candidates = [] for i in range(len(inputs)): # compute the classifier output for this character # print self.cmodel.info() raw = inputs[i] char = NI(raw) bbox = self.grouper.boundingBox(i) outputs = results[i] outputs = [(x[0], -log(x[1])) for x in outputs] candidates.append( Record(index=i, image=char, raw=raw, outputs=outputs, bbox=bbox)) # keep the characters around for debugging (used by ocropus-showlrecs) self.chars = candidates # update the per-character costs based on a text line model if self.linemodel is not None: self.linemodel.linecosts(candidates, image) # compute a list of space costs for each candidate character spacecosts = self.spacemodel.spacecosts(candidates, image) for c in candidates: i = c.index raw = c.raw char = c.image outputs = c.outputs # Add a skip transition with the pixel width as cost. # This ensures that the lattice is at least connected. # Note that for typical character widths, this is going # to be much larger than any per-charcter cost. self.grouper.setClass(i, ocropus.L_RHO, self.rho_scale * raw.dim(0)) # add the top classes to the lattice outputs.sort(key=lambda x: x[1]) s = iulib.ustrg() for cls, cost in outputs[:self.best]: # don't add the reject class (written as "~") if cls == "~": continue # don't add anything with a cost higher than the reject cost if cost > self.reject_cost: continue # for anything else, just add the classified character to the grouper s = iulib.unicode2ustrg(cls) self.grouper.setClass(i, s, min(cost, self.maxcost)) # add the computed space costs to the grouper as well self.grouper.setSpaceCost(i, spacecosts[i][0], spacecosts[i][1]) # extract the recognition lattice from the grouper self.grouper.getLattice(lattice) # return the raw segmentation as a result return rseg
#fst1.addTransition(s2, s3, 1002, 10.0,1002) fst1.addTransition(s2, s3, 14, 20.0, 14) a0 = fst2.newState() a1 = fst2.newState() a2= fst2.newState() a3 = fst2.newState() a4 = fst2.newState() a5 = fst2.newState() a6 = fst2.newState() #O=15, t=20 c=3 R=18 fst2.setAccept(a4); fst2.setAccept(a5); fst2.setAccept(a6); fst2.addTransition(a0, a1, 3, 23.0, 3);#c fst2.addTransition(a1, a3, 15, 1.0, 15);#O fst2.addTransition(a1, a2, 1, 20.0, 1);#a fst2.addTransition(a2, a4, 20, 40.0, 20);#T fst2.addTransition(a2, a5, 18, 18.0, 18);#R fst2.addTransition(a3, a6, 23, 13.0, 23);#W s = iulib.ustrg() v1 = iulib.intarray() v2 = iulib.intarray() ins = iulib.intarray() outs = iulib.intarray() costs = iulib.floatarray() n=1000 ocrofstll.beam_search(v1,v2,ins,outs,costs,fst1,fst2,n)
def recognizeLineSeg(self,lattice,rseg,image): """Recognize a line. lattice: result of recognition rseg: intarray where the raw segmentation will be put image: line image to be recognized""" ## compute the raw segmentation self.segmenter.charseg(rseg,image) ocropus.make_line_segmentation_black(rseg) if self.debug: show_segmentation(rseg) iulib.renumber_labels(rseg,1) self.grouper.setSegmentation(rseg) # compute the median segment height heights = [] for i in range(self.grouper.length()): bbox = self.grouper.boundingBox(i) heights.append(bbox.height()) mheight = median(array(heights)) self.mheight = mheight # invert the input image (make a copy first) old = image; image = iulib.bytearray(); image.copy(old) iulib.sub(255,image) # allocate working arrays segs = iulib.intarray() raw = iulib.bytearray() mask = iulib.bytearray() # this holds the list of recognized characters if keep!=0 self.chars = [] # now iterate through the characters for i in range(self.grouper.length()): # get the bounding box for the character (used later) bbox = self.grouper.boundingBox(i) aspect = bbox.height()*1.0/bbox.width() # extract the character image (and optionally display it) self.grouper.extractWithMask(raw,mask,image,i,1) char = NI(raw) char = char / float(amax(char)) if self.debug: imshow(char) raw_input() # Add a skip transition with the pixel width as cost. # This ensures that the lattice is at least connected. # Note that for typical character widths, this is going # to be much larger than any per-charcter cost. self.grouper.setClass(i,ocropus.L_RHO,self.rho_scale*raw.dim(0)) # compute the classifier output for this character # print self.cmodel.info() outputs = self.cmodel.coutputs(FI(char)) outputs = [(x[0],-log(x[1])) for x in outputs] self.chars.append(Record(index=i,image=char,outputs=outputs)) # add the top classes to the lattice outputs.sort(key=lambda x:x[1]) s = iulib.ustrg() for cls,cost in outputs[:self.best]: # don't add the reject class (written as "~") if cls=="~": continue # letters are never small, so we skip small bounding boxes that # are categorized as letters; this is an ugly special case, but # it is quite common ucls = cls if type(cls)==str: ucls = unicode(cls,"utf-8") category = unicodedata.category(ucls[0]) if bbox.height()<self.min_height*mheight and category[0]=="L": # add an empty transition to allow skipping junk # (commented out right now because I'm not sure whether # the grouper can handle it; FIXME) # self.grouper.setClass(i,"",1.0) continue # for anything else, just add the classified character to the grouper s.assign(cls) self.grouper.setClass(i,s,min(cost,self.maxcost)) # FIXME better space handling self.grouper.setSpaceCost(i,0.5,0.0) # extract the recognition lattice from the grouper self.grouper.getLattice(lattice) # return the raw segmentation as a result return rseg
def cc_statistics(image,dpi,min_pt,max_pt,verbose=0): w = image.dim(0) h = image.dim(1) ## compute connected component widths and heights components = iulib.intarray() components.copy(image) iulib.sub(iulib.max(components),components) iulib.label_components(components) boxes = iulib.rectarray() iulib.bounding_boxes(boxes,components) n = boxes.length() widths = array([boxes.at(i).width() for i in range(1,n)]) heights = array([boxes.at(i).height() for i in range(1,n)]) ## we consider "normal" components that are between 1/3 of the ## size of the minimum sized font and the full size of the ## maxmimum sized font; to compute this, we need to convert from ## font sizes in pt to pixel sizes, using the given dpi maxs = maximum(widths,heights) min_px_em = min_pt*dpi/72.0 min_px = (1.0/3.0) * min_px_em max_px = max_pt*dpi/72.0 ## compute the total page area covered by bounding boxes of connected ## components (we don't bother to try to avoid double counts in overlapping boxes) covered = sum(widths*heights)*1.0/w/h ## small components are those whose maximum dimension is smaller that the min size small = (maxs<min_px) ## large components have at least one dimension better than the max size large = (maxs>max_px) ## biggish components have both dimensions bigger than the small size (this ## excludes "." and "-" and is used for aspect ratio computations) biggish = ((widths>min_px)&(heights>min_px)) ## normal boxes are those that are neither small nor large normal = ~(small|large) ## absolute density of characters per square inch density = n*dpi**2*1.0/w/h ## relative density of characters per em h_density = n/(w/min_px_em) ## print some information if verbose: alert("# min",min_px,"max",max_px) alert("# normal",sum(normal),"small",sum(small),"large",sum(large)) alert("# density",density) alert("# h_density",h_density) alert("# covered",covered) ## compute aspect ratio statistics; we're using a right-trimmed mean of ## biggish components; this means that we exclude characters like "-" ## from the computation (because they are not biggish), and we also exclude ## large connected components such as rules (since they are trimmed off) ## the remaining mean should represent the mean of connected components that ## make up the bulk of the text on the page aspect = heights*1.0/widths aspect = aspect[biggish] a_mean = mean(trim1(aspect,0.1,tail='right')) result = Record( biggish = sum(biggish), normal = sum(normal), small = sum(small), large = sum(large), density = density, h_density = h_density, a_mean = a_mean, covered=covered, ) return result