def __init__(self, media, target=None): self.media = media self.target = target pro = DataProcessor(self.media, ImageType.PLAN) pro.prepare() self.binary = pro.batch() self.color = pro.color self.drawClipSource = True self.imageout_dir = '../temp/trash' os.makedirs(self.imageout_dir, exist_ok=True)
def getResult(self, src, save_image=False): """ @param {string} src {bool}save_image output debug image @return {OCRDocument} doucument """ pro = DataProcessor(src, ImageType.RAW, save_image=save_image) if pro.prepare() is None: logger.error('image error:{0}'.format(src)) return None buffer = pro.tobinary(pro.batch()) temp_file_name = self.create_TemporyFile(buffer, True) document = self.ocr.recognize(temp_file_name) os.remove(temp_file_name) output = '#' + datetime.now().strftime('%F %T.%f')[:-3] + '\n' output += '\n'.join(document.names()) + '\n' with Serializer.open_stream('../temp/corpus.txt', mode='a') as file: file.write(output) # ocr corpus data -> NaiveBayes classifier # ranking name swap change = self.naivebayes.predict_all(document.names()) #doucument.changeNames(change) document.dump() return document