class Pdfsampler: def __init__(self,filename): self.filename = filename self._l = logging.getLogger(self.__class__.__name__) self.c = Classifier_with_remove(training_filename="../preomr_edited_cnn.xml") self.c.set_k(1) def randompages(self,count,firstpage=1): doc = PDFDocument(self.filename) pages = doc.count_pages() chosen_pages = random.sample([i for i in xrange(firstpage,pages+1)],min(pages-firstpage+1,count)) chosen_pages.sort() self._l.info("%s - %d pages. %s chosen",self.filename,pages,chosen_pages) def pi(n): return Page(self.filename,n,self.c) return [ pi(p) for p in chosen_pages ] def all(self,firstpage=1): doc = PDFDocument(self.filename) pages = doc.count_pages() chosen_pages = [i for i in xrange(firstpage,pages+1)] self._l.info("%s - %d pages. All chosen",self.filename,pages) def pi(n): return Page(self.filename,n,self.c) return [ pi(p) for p in chosen_pages ]
else: classified = [] return seg['text'],seg['inside'],seg['outside'],classified if __name__ == '__main__': from gamera.core import * from class_dynamic import Classifier_with_remove from ill_music import IllMusicImage import sys #LOG_FILENAME = '/tmp/logging_example.out' FORMAT = "%(asctime)-15s %(levelname)s [%(name)s.%(funcName)s] %(message)s" logging.basicConfig(level=logging.DEBUG,format=FORMAT) init_gamera() c = Classifier_with_remove(training_filename="preomr_edited_cnn.xml") c.set_k(1) filename = sys.argv[-1] #c.classifier.load_settings("gasettings.txt") mi = IllMusicImage(load_image(filename),classifier=c) ret = mi.without() ret.save_PNG("%s_Removed.png"%filename) logging.debug("Done with %s"%filename) ret = mi.color_segment(classified_box=True) ret.save_PNG("%s_ColorSegment.png"%filename) logging.debug("Done with %s"%filename)
def test_e_fp(filename,expected_count=10): init_gamera() c = Classifier_with_remove() c.set_k(1) c.change_features(["volume64regions"]) ci = c.classify_image(filename) #files = ["mergedyn2.xml", "mergedyn.xml","only-dynamics.xml", # "newtrain-dynamic.xml", "preomr.xml"] files = ["preomr.xml","preomr_edited.xml","preomr_edited_cnn.xml"] import os.path # try to match with different trainingsets. for dynamic in ([ d for d in files if os.path.isfile(d) ]): ci.load_new_training_data(dynamic) print "%s - count_of_training=%d, k=%d"%(dynamic,len(c.stats),c.k) result = {} # Push into buckets based on the count of found glyphs. csv = {} sys.stdout.flush() # Try with different epsilon for false_positives: e_fp for e_fp in arange(0.01,1.01,0.01): c.e_fp=e_fp count = len(ci.classified_glyphs()) # Init bucket. if not result.has_key(count): result[count] = [] result[count].append((e_fp,c.d_t())) csv[e_fp] = count # Find the best match to the wanted result. k,res,diff = find_nearest(result,expected_count) confid = [ (len(v),key,v[0][0],v[0][1]) for key,v in result.iteritems() ] confid2 = [ (key,len(v)) for key,v in result.iteritems() ] confid.sort(reverse=True) confid2.sort() print "efp,count" for e_fp,c in sorted(csv.iteritems()): print "%s,%s"%(e_fp,c) print print "count,spansize" for count,spansize in confid2: print "%s,%s"%(count,spansize) return ret = [] for i in range(0,min(10,len(confid))+1): ret.append(confid[i]) if not result.has_key(expected_count): print "Never found the desired amount with %s"%dynamic print "Found in %d(%d): %s"%(k,diff,[r for r in res]) rgbimg = ci.image.to_rgb() cg = ci.classified_glyphs(res[0].d_t) [outline(rgbimg,g,3.0,RGBPixel(255,0,0)) for g in cg] rgbimg.save_PNG("class_%s_%s.png"%(filename,dynamic)) print
""" seg = self.ccs_overall() if classify: classified = seg['classified'] else: classified = [] return seg['text'], seg['inside'], seg['outside'], classified if __name__ == '__main__': from gamera.core import * from class_dynamic import Classifier_with_remove from ill_music import IllMusicImage import sys #LOG_FILENAME = '/tmp/logging_example.out' FORMAT = "%(asctime)-15s %(levelname)s [%(name)s.%(funcName)s] %(message)s" logging.basicConfig(level=logging.DEBUG, format=FORMAT) init_gamera() c = Classifier_with_remove(training_filename="preomr_edited_cnn.xml") c.set_k(1) filename = sys.argv[-1] #c.classifier.load_settings("gasettings.txt") mi = IllMusicImage(load_image(filename), classifier=c) ret = mi.without() ret.save_PNG("%s_Removed.png" % filename) logging.debug("Done with %s" % filename) ret = mi.color_segment(classified_box=True) ret.save_PNG("%s_ColorSegment.png" % filename) logging.debug("Done with %s" % filename)
def test_e_fp(filename, expected_count=10): init_gamera() c = Classifier_with_remove() c.set_k(1) c.change_features(["volume64regions"]) ci = c.classify_image(filename) #files = ["mergedyn2.xml", "mergedyn.xml","only-dynamics.xml", # "newtrain-dynamic.xml", "preomr.xml"] files = ["preomr.xml", "preomr_edited.xml", "preomr_edited_cnn.xml"] import os.path # try to match with different trainingsets. for dynamic in ([d for d in files if os.path.isfile(d)]): ci.load_new_training_data(dynamic) print "%s - count_of_training=%d, k=%d" % (dynamic, len(c.stats), c.k) result = {} # Push into buckets based on the count of found glyphs. csv = {} sys.stdout.flush() # Try with different epsilon for false_positives: e_fp for e_fp in arange(0.01, 1.01, 0.01): c.e_fp = e_fp count = len(ci.classified_glyphs()) # Init bucket. if not result.has_key(count): result[count] = [] result[count].append((e_fp, c.d_t())) csv[e_fp] = count # Find the best match to the wanted result. k, res, diff = find_nearest(result, expected_count) confid = [(len(v), key, v[0][0], v[0][1]) for key, v in result.iteritems()] confid2 = [(key, len(v)) for key, v in result.iteritems()] confid.sort(reverse=True) confid2.sort() print "efp,count" for e_fp, c in sorted(csv.iteritems()): print "%s,%s" % (e_fp, c) print print "count,spansize" for count, spansize in confid2: print "%s,%s" % (count, spansize) return ret = [] for i in range(0, min(10, len(confid)) + 1): ret.append(confid[i]) if not result.has_key(expected_count): print "Never found the desired amount with %s" % dynamic print "Found in %d(%d): %s" % (k, diff, [r for r in res]) rgbimg = ci.image.to_rgb() cg = ci.classified_glyphs(res[0].d_t) [outline(rgbimg, g, 3.0, RGBPixel(255, 0, 0)) for g in cg] rgbimg.save_PNG("class_%s_%s.png" % (filename, dynamic)) print