def testOrc(): ORC_PATH = os.path.join(os.path.dirname(__file__), 'libs/pytesser').replace('\\', '/') full_path = ORC_PATH + "/test.png" im = Image.open(full_path) text = image_file_to_string(full_path) print "testOrc: %s" % text
def item_completed(self, results, item, info): image_paths = [x['path'] for ok, x in results if ok] if not image_paths: #raise DropItem("Item contains no images") info.spider.log("%s : Item contains no images" % self.__class__, log.INFO) for image_path in image_paths: full_path = PRICE_IMAGES_STORE +'/'+image_path try: im = Image.open(full_path) #text = image_to_string(im) text = image_file_to_string(full_path) text = self._fixed_string(text) item['price'] = text[2:].strip() except: item['price'] = None raise return item
def item_completed(self, results, item, info): image_paths = [x['path'] for ok, x in results if ok] if not image_paths: #raise DropItem("Item contains no images") info.spider.log("%s : Item contains no images" % self.__class__, log.INFO) for image_path in image_paths: full_path = PRICE_IMAGES_STORE + '/' + image_path try: im = Image.open(full_path) #text = image_to_string(im) text = image_file_to_string(full_path) text = self._fixed_string(text) item['price'] = text[2:].strip() except: item['price'] = None raise return item
def testOrc(): ORC_PATH = os.path.join(os.path.dirname(__file__), 'libs/pytesser').replace('\\','/') full_path = ORC_PATH + "/test.png" im = Image.open(full_path) text = image_file_to_string(full_path) print "testOrc: %s" % text