def locate_asset(self, image, classifier, lines="") -> List: ''' Determines where an asset is in the picture, returning a set of coordinates, for the top left, top right, bottom left, and bottom right of the tag Returns: [(area, image)] Area is the coordinates of the bounding box Image is the image, opened by PIL.''' cropped_images = [] #print(lines) for line in str(lines).split('\n'): if "sign" in line: continue if "photo" in line: continue #print(line) if "left_x" in line: #if 'photo' or 'sign' in line: # Extract the nameplate info #print(line) area = classifier.extract_info(line) # Open image cropped_images.append((area, crop_image(image, area))) if cropped_images == []: logger.bad("No label found in image.") else: logger.good("Found " + str(len(cropped_images)) + " label(s) in image.") return cropped_images
def initialize_rotnet() -> int: ''' For speed concerns, let's load up the model first Head to the RotNet directory and use correct_rotation to lod the model ''' try: logger.good("Initializing RotNet") init_rotnet(ROTNET_LOCATION + "/" + ROTNET_MODEL_NAME) return 0 except: return -1
def init_ocr(self): ''' Initializes the OCR engine ''' try: if self.TESSERACT: logger.good("Initializing Tesseract") self.OCR = TesseractOCR() if self.OCR == None or self.OCR == -1: return -1 return 0 except: return -1
def rotate(image: object) -> object: ''' Uses RotNet's Keras/Tensorflow algorithm to rotate an image. Input: image, opened with PIL Output: Rotated image ''' # We need to save the file first for processing image.save(ROTNET_SAVE_FILE_NAME, "JPEG") logger.good("Rotating Image") rotate_image(ROTNET_SAVE_FILE_NAME) image = Image.open(ROTNET_SAVE_FILE_NAME) return image
def init_classifier(self): ''' Initializes the classifier ''' try: if self.DARKNET: # Get a child process for speed considerations logger.good("Initializing Darknet") self.classifier = DarknetClassifier() if self.classifier == None or self.classifier == -1: return -1 return 0 except: return -1
def find_and_classify(self, filename): ''' find the required text field from given image and read it through tesseract. Results are stored in a dicionary. ''' start = time.time() #------------------------------Classify Image----------------------------------------# logger.good("Classifying Image") coords = self.classifier.classify_image(filename) #lines=str(coords).split('\n') inf = [] for line in str(coords).split('\n'): if 'left_x' in line: info = line.split() ##print(info) left_x = int(info[3]) top_y = int(info[5]) inf.append((info[0], left_x, top_y)) time1 = time.time() print("Classify Time: " + str(time1 - start)) # ----------------------------Crop Image-------------------------------------------# logger.good("Finding required text") cropped_images = self.locate_asset(filename, self.classifier, lines=coords) time2 = time.time() #----------------------------Perform OCR-------------------------------------------# if cropped_images == []: logger.bad("No text found!") return None else: logger.good("Performing OCR") ocr_results = self.perform_ocr(cropped_images, 0) ocr_results = ocr_results + self.perform_ocr(cropped_images, 180) #print(ocr_results) k = [] v = [] #print(inf) fil = filename + '-ocr' for i in range(len(ocr_results)): v.append(ocr_results[i]) for i in range(int(len(ocr_results) / 2)): k.append(inf[i][0][:-1]) for i in range(int(len(ocr_results) / 2), (len(ocr_results))): k.append(inf[i % int(len(ocr_results) / 2)][0][:-1] + '2') t = dict(zip(k, v)) time3 = time.time() print("OCR Time: " + str(time3 - time2)) end = time.time() logger.good("Elapsed: " + str(end - start)) return t
def init_database(self): if self.LOCAL_DATABASE: logger.good("Initializing local database") from utils.local_database import LocalDatabase self.database = LocalDatabase() elif self.COSMOS_DATABASE: logger.good("Initializing Cosmos Database") from utils.cosmos_database import CosmosDatabase self.database = CosmosDatabase() else: self.database = -1 if self.database == -1: return -1 return 0
def init_ocr(self): ''' Initializes the OCR engine ''' try: if self.TESSERACT: logger.good("Initializing Tesseract") self.OCR = TesseractOCR() elif self.COGNITIVE_SERVICES: logger.good("Initializing Cognitive Services") self.OCR = AzureOCR() if self.OCR == None or self.OCR == -1: return -1 return 0 except: return -1
def locate_asset(image, lines=""): cropped_images = [] for line in lines: cropped_images.append((line, crop_image(image, line))) if cropped_images == []: logger.bad("No label found in image.") else: logger.good("Found " + str(len(cropped_images)) + " label(s) in image.") return cropped_images
def crop_image(image, area:Tuple) -> object: ''' Uses PIL to crop an image, given its area. Input: image - PIL opened image Area - Coordinates in tuple (xmin, ymax, xmax, ymin) format ''' img = Image.open(image) cropped_image = img.crop(area) # Rotation should happen here rotated_image = rotate(cropped_image) size = (3200, 3200) rotated_image.thumbnail(size, Image.ANTIALIAS) global i rotated_image.save("asdf" + str(i) + ".jpg", "JPEG") i += 1 if SHOW_IMAGES: logger.good("Showing cropped image") rotated_image.show() return rotated_image
def find_and_classify(self, filename): start = time.time() #### Classify Image #### logger.good("Classifying Image") coords = self.classifier.classify_image(filename) ######################## time1 = time.time() print("Classify Time: " + str(time1-start)) #### Crop/rotate Image #### logger.good("Locating Asset") cropped_images = self.locate_asset(filename, self.classifier, lines=coords) ########################### time2 = time.time() print("Rotate Time: " + str(time2-time1)) #### Perform OCR #### ocr_results = None if cropped_images == []: logger.bad("No assets found, so terminating execution") else: logger.good("Performing OCR") ocr_results = self.OCR.ocr(cropped_images) ##################### time3 = time.time() print("OCR Time: " + str(time3-time2)) end = time.time() logger.good("Elapsed: " + str(end-start)) #### Lookup Database #### if self.database != -1: products = self.database.lookup_database(ocr_results) return products else: return ocr_results
def find_and_classify(self, filename): ''' find the required text field from given image and read it through tesseract. Results are stored in a dicionary. ''' start = time.time() #------------------------------Classify Image----------------------------------------# logger.good("Classifying Image") # coords = self.classifier.classify_image(filename) # #lines=str(coords).split('\n') # inf=[] # for line in str(coords).split('\n'): # if "sign" in line: # continue # if "photo" in line: # continue # try: # if 'left_x' in line: # info=line.split() # left_x = int(info[3]) # top_y = int(info[5]) # inf.append((info[0],left_x,top_y)) # except: # continue # print("printing the info") # print(inf) # time1 = time.time() # print("Classify Time: " + str(time1-start)) coords = classify(filename) # ----------------------------Crop Image-------------------------------------------# logger.good("Finding required text") cropped_images = locate_asset.locate_asset(filename, lines=coords) time2 = time.time() #----------------------------Perform OCR-------------------------------------------# ocr_results = None if cropped_images == []: logger.bad("No text found!") return None else: logger.good("Performing OCR") ocr_results = self.OCR.ocr(cropped_images) print(ocr_results)
def find_and_classify(self, filename): ''' find the required text field from given image and read it through tesseract. Results are stored in a dicionary. ''' start = time.time() #------------------------------Classify Image----------------------------------------# logger.good("Classifying Image") coords = self.classifier.classify_image(filename) #lines=str(coords).split('\n') inf = [] for line in str(coords).split('\n'): if "sign" in line: continue if "photo" in line: continue try: if 'left_x' in line: info = line.split() left_x = int(info[3]) top_y = int(info[5]) inf.append((info[0], left_x, top_y)) except: continue print("printing the info") print(inf) time1 = time.time() print("Classify Time: " + str(time1 - start)) # ----------------------------Crop Image-------------------------------------------# logger.good("Finding required text") cropped_images = self.locate_asset(filename, self.classifier, lines=coords) time2 = time.time() #----------------------------Perform OCR-------------------------------------------# ocr_results = None if cropped_images == []: logger.bad("No text found!") return None else: logger.good("Performing OCR") ocr_results = self.OCR.ocr(cropped_images) #print(ocr_results) k = [] v = [] fil = filename + '-ocr' #with open(fil, 'w+') as f: for i in range(len(ocr_results)): v.append(ocr_results[i][1]) k.append(inf[i][0][:-1]) #k.insert(0,'Filename') #v.insert(0,filename) t = dict(zip(k, v)) time3 = time.time() print("OCR Time: " + str(time3 - time2)) end = time.time() logger.good("Elapsed: " + str(end - start)) print(t) return t