예제 #1
0
def locate_asset(self, image, classifier, lines="") -> List:
    ''' Determines where an asset is in the picture, returning
	 a set of coordinates, for the top left, top right, bottom
	 left, and bottom right of the tag
	 Returns:
	 [(area, image)]
	 	Area is the coordinates of the bounding box
	 	Image is the image, opened by PIL.'''
    cropped_images = []
    #print(lines)
    for line in str(lines).split('\n'):
        if "sign" in line:
            continue
        if "photo" in line:
            continue
        #print(line)
        if "left_x" in line:
            #if 'photo' or 'sign' in line:
            # Extract the nameplate info
            #print(line)
            area = classifier.extract_info(line)
            # Open image
            cropped_images.append((area, crop_image(image, area)))

    if cropped_images == []:
        logger.bad("No label found in image.")
    else:
        logger.good("Found " + str(len(cropped_images)) +
                    " label(s) in image.")

    return cropped_images
예제 #2
0
def initialize_rotnet() -> int:
    ''' For speed concerns, let's load up the model first
	Head to the RotNet directory and use correct_rotation to lod the model '''
    try:
        logger.good("Initializing RotNet")
        init_rotnet(ROTNET_LOCATION + "/" + ROTNET_MODEL_NAME)
        return 0
    except:
        return -1
예제 #3
0
    def init_ocr(self):
        ''' Initializes the OCR engine '''
        try:
            if self.TESSERACT:
                logger.good("Initializing Tesseract")
                self.OCR = TesseractOCR()

            if self.OCR == None or self.OCR == -1:
                return -1
            return 0
        except:
            return -1
예제 #4
0
def rotate(image: object) -> object:
    ''' Uses RotNet's Keras/Tensorflow algorithm to rotate an image.
		Input: image, opened with PIL
		Output: Rotated image '''

    # We need to save the file first for processing
    image.save(ROTNET_SAVE_FILE_NAME, "JPEG")

    logger.good("Rotating Image")
    rotate_image(ROTNET_SAVE_FILE_NAME)
    image = Image.open(ROTNET_SAVE_FILE_NAME)
    return image
    def init_classifier(self):
        ''' Initializes the classifier '''
        try:
            if self.DARKNET:
                # Get a child process for speed considerations
                logger.good("Initializing Darknet")
                self.classifier = DarknetClassifier()

            if self.classifier == None or self.classifier == -1:
                return -1
            return 0
        except:
            return -1
예제 #6
0
    def find_and_classify(self, filename):
        ''' find the required text field from given image and read it through tesseract.
		    Results are stored in a dicionary. '''
        start = time.time()

        #------------------------------Classify Image----------------------------------------#

        logger.good("Classifying Image")

        coords = self.classifier.classify_image(filename)
        #lines=str(coords).split('\n')
        inf = []
        for line in str(coords).split('\n'):
            if 'left_x' in line:
                info = line.split()
                ##print(info)
                left_x = int(info[3])
                top_y = int(info[5])
                inf.append((info[0], left_x, top_y))

        time1 = time.time()
        print("Classify Time: " + str(time1 - start))

        # ----------------------------Crop Image-------------------------------------------#
        logger.good("Finding required text")
        cropped_images = self.locate_asset(filename,
                                           self.classifier,
                                           lines=coords)

        time2 = time.time()

        #----------------------------Perform OCR-------------------------------------------#

        if cropped_images == []:
            logger.bad("No text found!")
            return None
        else:
            logger.good("Performing OCR")
            ocr_results = self.perform_ocr(cropped_images, 0)
            ocr_results = ocr_results + self.perform_ocr(cropped_images, 180)
            #print(ocr_results)
            k = []
            v = []

            #print(inf)
            fil = filename + '-ocr'
            for i in range(len(ocr_results)):
                v.append(ocr_results[i])
            for i in range(int(len(ocr_results) / 2)):
                k.append(inf[i][0][:-1])
            for i in range(int(len(ocr_results) / 2), (len(ocr_results))):
                k.append(inf[i % int(len(ocr_results) / 2)][0][:-1] + '2')

            t = dict(zip(k, v))
        time3 = time.time()
        print("OCR Time: " + str(time3 - time2))

        end = time.time()
        logger.good("Elapsed: " + str(end - start))
        return t
예제 #7
0
	def init_database(self):
		if self.LOCAL_DATABASE:
			logger.good("Initializing local database")
			from utils.local_database import LocalDatabase
			self.database = LocalDatabase()
		elif self.COSMOS_DATABASE:
			logger.good("Initializing Cosmos Database")
			from utils.cosmos_database import CosmosDatabase
			self.database = CosmosDatabase()
		else:
			self.database = -1
		if self.database == -1:
			return -1
		return 0
예제 #8
0
	def init_ocr(self):
		''' Initializes the OCR engine '''
		try:
			if self.TESSERACT:
				logger.good("Initializing Tesseract")
				self.OCR = TesseractOCR()
			elif self.COGNITIVE_SERVICES:
				logger.good("Initializing Cognitive Services")
				self.OCR = AzureOCR()
			if self.OCR == None or self.OCR == -1:
				return -1
			return 0
		except:
			return -1
예제 #9
0
def locate_asset(image, lines=""):

    cropped_images = []

    for line in lines:

        cropped_images.append((line, crop_image(image, line)))

    if cropped_images == []:
        logger.bad("No label found in image.")
    else:
        logger.good("Found " + str(len(cropped_images)) +
                    " label(s) in image.")

    return cropped_images
예제 #10
0
def crop_image(image, area:Tuple) -> object:
	''' Uses PIL to crop an image, given its area.
	Input:
		image - PIL opened image
		Area - Coordinates in tuple (xmin, ymax, xmax, ymin) format '''
	img = Image.open(image)
	cropped_image = img.crop(area)

	# Rotation should happen here
	rotated_image = rotate(cropped_image)

	size = (3200, 3200)
	rotated_image.thumbnail(size, Image.ANTIALIAS)
	global i
	rotated_image.save("asdf" + str(i) + ".jpg", "JPEG")
	i += 1

	if SHOW_IMAGES:
		logger.good("Showing cropped image")
		rotated_image.show()

	return rotated_image
예제 #11
0
	def find_and_classify(self, filename):
		start = time.time()

		#### Classify Image ####
		logger.good("Classifying Image")
		coords = self.classifier.classify_image(filename)
		########################

		time1 = time.time()
		print("Classify Time: " + str(time1-start))

		#### Crop/rotate Image ####
		logger.good("Locating Asset")
		cropped_images = self.locate_asset(filename, self.classifier, lines=coords)
		###########################
		
		time2 = time.time()
		print("Rotate Time: " + str(time2-time1))


		#### Perform OCR ####
		ocr_results = None
		if cropped_images == []:
			logger.bad("No assets found, so terminating execution")	 
		else:
			logger.good("Performing OCR")
			ocr_results = self.OCR.ocr(cropped_images)
		#####################
		
		time3 = time.time()
		print("OCR Time: " + str(time3-time2))

		end = time.time()
		logger.good("Elapsed: " + str(end-start))

		#### Lookup Database ####
		if self.database != -1:
			products = self.database.lookup_database(ocr_results)
			return products
		else:
			return ocr_results
예제 #12
0
    def find_and_classify(self, filename):
        ''' find the required text field from given image and read it through tesseract.
		    Results are stored in a dicionary. '''
        start = time.time()

        #------------------------------Classify Image----------------------------------------#

        logger.good("Classifying Image")

        # coords = self.classifier.classify_image(filename)
        # #lines=str(coords).split('\n')
        # inf=[]
        # for line in str(coords).split('\n'):
        # 	if "sign" in line:
        # 		continue
        # 	if "photo" in line:
        # 		continue
        # 	try:
        # 		if 'left_x' in line:
        # 			info=line.split()
        # 			left_x = int(info[3])
        # 			top_y = int(info[5])
        # 			inf.append((info[0],left_x,top_y))
        # 	except:
        # 		continue

        # print("printing the info")
        # print(inf)
        # time1 = time.time()
        # print("Classify Time: " + str(time1-start))
        coords = classify(filename)

        # ----------------------------Crop Image-------------------------------------------#
        logger.good("Finding required text")

        cropped_images = locate_asset.locate_asset(filename, lines=coords)

        time2 = time.time()

        #----------------------------Perform OCR-------------------------------------------#

        ocr_results = None

        if cropped_images == []:
            logger.bad("No text found!")
            return None
        else:
            logger.good("Performing OCR")
            ocr_results = self.OCR.ocr(cropped_images)
            print(ocr_results)
예제 #13
0
    def find_and_classify(self, filename):
        ''' find the required text field from given image and read it through tesseract.
		    Results are stored in a dicionary. '''
        start = time.time()

        #------------------------------Classify Image----------------------------------------#

        logger.good("Classifying Image")

        coords = self.classifier.classify_image(filename)
        #lines=str(coords).split('\n')
        inf = []
        for line in str(coords).split('\n'):
            if "sign" in line:
                continue
            if "photo" in line:
                continue
            try:
                if 'left_x' in line:
                    info = line.split()
                    left_x = int(info[3])
                    top_y = int(info[5])
                    inf.append((info[0], left_x, top_y))
            except:
                continue

        print("printing the info")
        print(inf)
        time1 = time.time()
        print("Classify Time: " + str(time1 - start))

        # ----------------------------Crop Image-------------------------------------------#
        logger.good("Finding required text")
        cropped_images = self.locate_asset(filename,
                                           self.classifier,
                                           lines=coords)

        time2 = time.time()

        #----------------------------Perform OCR-------------------------------------------#

        ocr_results = None

        if cropped_images == []:
            logger.bad("No text found!")
            return None
        else:
            logger.good("Performing OCR")
            ocr_results = self.OCR.ocr(cropped_images)
            #print(ocr_results)
            k = []
            v = []

            fil = filename + '-ocr'
            #with open(fil, 'w+') as f:
            for i in range(len(ocr_results)):

                v.append(ocr_results[i][1])
                k.append(inf[i][0][:-1])

            #k.insert(0,'Filename')
            #v.insert(0,filename)
            t = dict(zip(k, v))

        time3 = time.time()
        print("OCR Time: " + str(time3 - time2))

        end = time.time()
        logger.good("Elapsed: " + str(end - start))
        print(t)
        return t