def run(modules_config, image_folder, segmentation_path, output_filename): # Load all objects we will need throughout the run process # (segmentation data, recognition/postprocess modules, printer, and dataset (image loader)) with open(segmentation_path) as f: segmentation = [row.split('\t') for row in f.read().split('\n') if row != ''] segmentation = segmentation[1:] modules = load_modules.load(modules_config) printer = Printer(output_filename, [module['field_name'] for module in modules]) printer.write_header() dataset = FullDataset(image_folder, segmentation) dataloader = DataLoader(dataset, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate) prev_img_name = None count = 0 line_number = 1 for i, batch in enumerate(dataloader): img_names = batch['img_names'] if prev_img_name is None: prev_img_name = img_names[0] line_number = 1 images = batch['fields'] line_output = {} # warn possible segmentation errors, but don't auto fail, just warn if len(images) != len(modules): sys.stderr.write("number of fields != expected number of fields for images " + img_names + "line # " + str(line_number) + "\n") # Read each field on this line into line_output for i in range(min(len(images), len(modules))): module = modules[i] image = images[i] rectified_img = module['preprocessing'].batch_preprocess(image) pred = module['recognition'].batch_run(rectified_img) corrected_pred = module['postprocessing'].batch_postprocess(pred) line_output[module['field_name']] = corrected_pred # output this batch's labels line_ids = [] for i, img_nm in enumerate(img_names): if img_nm != prev_img_name: prev_img_name = img_nm line_number = 1 count += 1 # log number of pages read print(count) lnnm = str(line_number) lnnm = lnnm if len(lnnm) == 2 else '0' + lnnm idnt = img_nm.split('/')[-1].split('.')[0] + '_' + lnnm line_ids.append(idnt) line_number += 1 printer.write_batch(line_ids, line_output) printer.close()
def run(modules_config, image_folder, segmentation_path, output_filename): # Load all objects we will need throughout the run process # (segmentation data, recognition/postprocess modules, printer, and dataset (image loader)) with open(segmentation_path) as f: segmentation = [ row.split('\t') for row in f.read().split('\n') if row != '' ] segmentation = segmentation[1:] modules = load_modules.load(modules_config) printer = Printer(output_filename, [module['field_name'] for module in modules]) printer.write_header() dataset = FullDataset(image_folder, segmentation) prev_img_name = None count = 0 line_number = 1 for census_line in dataset: if prev_img_name is None or census_line['image_name'] != prev_img_name: prev_img_name = census_line['image_name'] line_number = 1 count += 1 # print number of census pages read so far print(count) img_name = census_line['image_name'] images = census_line['fields'] line_output = {} # warn possible segmentation errors, but don't auto fail, just warn if len(images) != len(modules): sys.stderr.write( "number of fields != expected number of fields for image " + img_name + "line # " + str(line_number) + "\n") # Read each field on this line into line_output for i in range(min(len(images), len(modules))): module = modules[i] image = images[i] rectified_img = module['preprocessing'].preprocess(image) pred = module['recognition'].run(rectified_img) corrected_pred = module['postprocessing'].postprocess(pred) line_output[module['field_name']] = corrected_pred # output this line's labels line_print = str(line_number) line_print = line_print if len(line_print) == 2 else '0' + line_print line_id = img_name.split('/')[-1].split('.')[0] + '_' + line_print printer.write_line(line_id, line_output) line_number += 1 printer.close()
def printReportMethod(self): # Return if none of the options are enabled if not self.plotGraph and not self.printReport: return dirPath = self.dirPath globalProducts = self.readDir(self.printReport) if self.printReport: # Print the messages to console consolePrinter = Printer(ConsolePrinter()) consolePrinter.printTopNByAverage(globalProducts, 3) consolePrinter.printTopNByReviews(globalProducts, 3) consolePrinter.printRatingFrequency(globalProducts, 3) consolePrinter.close() # Print the messages to file filePrinter = Printer(FilePrinter('TestReport.txt')) filePrinter.printTopNByAverage(globalProducts, 3) filePrinter.printTopNByReviews(globalProducts, 3) filePrinter.printRatingFrequency(globalProducts, 3) filePrinter.close()