def get_data(self, paths, procs): # initializing a dictionary to hold binary information binaries = dict() # get the classes categories = [ folder for folder in os.listdir(paths["data_path"]) if not folder.startswith('.') ] if self.debug: print("Categories :", categories) Img_Size = procs["imageSize"] di = DataImport() split_data = procs["splitData"] try: if split_data: # takes folder of images and splits them into train, test, valid and returns those paths data = di.create_train_test_valid(categories, paths, tuple(procs["splitRatio"])) self.logger.info( "Data split into training, validation and testing successfully!" ) # pass the training, validation, test dir paths and get the train, test, validation matrices colormap = procs["colorMap"] train_data, train_labels = di.create_data_matrices( data["train_labels"], data["train_data"], colormap) valid_data, valid_labels = di.create_data_matrices( data["valid_labels"], data["valid_data"], colormap) test_data, test_labels = di.create_data_matrices( data["test_labels"], data["test_data"], colormap) self.logger.info( "Train-Validation-Test data and label matrices generated successfully!" ) # resize images to a predefined size proc = Wrangler() train_matrix = proc.resize_images(Img_Size[0], Img_Size[1], train_data, colormap, paths["run_path"], procs) valid_matrix = proc.resize_images(Img_Size[0], Img_Size[1], valid_data, colormap, paths["run_path"], procs) test_matrix = proc.resize_images(Img_Size[0], Img_Size[1], test_data, colormap, paths["run_path"], procs) self.logger.info( "Images resized maintaining aspect ratio successfully!") # process the data to right format train_data = proc.create_numpy_data(train_matrix) valid_data = proc.create_numpy_data(valid_matrix) test_data = proc.create_numpy_data(test_matrix) self.logger.info( "Data converted into Numpy format successfully!") # writing the binaries to disk print("Writing data binaries to disk...") binaries["train_data"] = os.path.sep.join( [paths["binary_path"], "train_data.npy"]) binaries["train_labels"] = os.path.sep.join( [paths["binary_path"], "train_labels.npy"]) binaries["valid_data"] = os.path.sep.join( [paths["binary_path"], "valid_data.npy"]) binaries["valid_labels"] = os.path.sep.join( [paths["binary_path"], "valid_labels.npy"]) binaries["test_data"] = os.path.sep.join( [paths["binary_path"], "test_data.npy"]) binaries["test_labels"] = os.path.sep.join( [paths["binary_path"], "test_labels.npy"]) # save the binaries to path on the disk np.save(binaries["train_data"], train_data) np.save(binaries["valid_data"], valid_data) np.save(binaries["test_data"], test_data) np.save(binaries["train_labels"], train_labels) np.save(binaries["valid_labels"], valid_labels) np.save(binaries["test_labels"], test_labels) self.logger.info("Binaries writing successfully to disk!") elif os.path.exists(paths["binary_path"]) and os.listdir( paths["binary_path"]) != []: binaries["train_data"] = os.path.sep.join( [paths["binary_path"], "train_data.npy"]) binaries["train_labels"] = os.path.sep.join( [paths["binary_path"], "train_labels.npy"]) binaries["valid_data"] = os.path.sep.join( [paths["binary_path"], "valid_data.npy"]) binaries["valid_labels"] = os.path.sep.join( [paths["binary_path"], "valid_labels.npy"]) binaries["test_data"] = os.path.sep.join( [paths["binary_path"], "test_data.npy"]) binaries["test_labels"] = os.path.sep.join( [paths["binary_path"], "test_labels.npy"]) self.logger.info( "Existing binary data files loaded successfully!") except Exception as e: print(e) print("No binaries present!") sys.exit(1) return binaries