def cropped_obj_images_to_gcs(self): # Read, Crop, and Resize Images mf.print_timestamp_message( f'Reading, cropping, and resizing {self.class_name} images') image_arrays_concat = self.get_cropped_obj_images() n_images = image_arrays_concat.shape[0] # Write Images to Google Cloud Storage Bucket image_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_array_save_name}' mf.print_timestamp_message( f'Writing {n_images} cropped images to GCS bucket/folder {self.bucket_name}/{image_save_name}' ) mf.save_np_array_to_gsc(np_array=image_arrays_concat, bucket_name=self.bucket_name, file_name=image_save_name)
def resize_and_save_images(self): # Generate Class Information mf.print_timestamp_message( f'Getting urls, bounding boxes, and image IDs for {self.class_name} images' ) urls, bbox_df, image_ids, class_image_df = self.get_image_class_info() # Read and Resize Images n_url = len(urls) mf.print_timestamp_message( f'Reading images from {n_url} URLs and resizing to {self.resize_height} X {self.resize_width}' ) image_arrays = load_resize_images_from_urls( url_list=urls, resize_height=self.resize_height, resize_width=self.resize_width) image_arrays_concat = np.array(image_arrays) # Write Images to Google Cloud Storage Bucket image_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_array_save_name}' mf.print_timestamp_message( f'Writing images to GCS bucket/folder {self.bucket_name}/{image_save_name}' ) mf.save_np_array_to_gsc(np_array=image_arrays_concat, bucket_name=self.bucket_name, file_name=image_save_name) # Write Bounding Box Csv to Google Cloud Storage Bucket bbox_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_bbox_save_name}' mf.print_timestamp_message( f'Writing bounding box csv file to GCS bucket/folder {self.bucket_name}/{bbox_save_name}' ) mf.write_csv_to_gcs(dframe=bbox_df, bucket_name=self.bucket_name, file_name=bbox_save_name) # Write Class Info Csv to Google Cloud Storage Bucket class_save_name = f'{self.processed_bucket_subfolder}{self.class_name}/{self.processed_class_save_name}' mf.print_timestamp_message( f'Writing class image info csv file to GCS bucket/folder {self.bucket_name}/{class_save_name}' ) mf.write_csv_to_gcs(dframe=class_image_df, bucket_name=self.bucket_name, file_name=class_save_name)
def get_processed_data(self): # Retrieve Images, Classification Array, and Bounding Boxes for Multiple Classes x_img_list = [] y_bbox_list = [] y_classif_list = [] # Loop Over Image CLasses to Retrieve Data for i, x in enumerate(self.class_list): mf.print_timestamp_message( f"Pulling data for class '{x}' ({i+1} of {len(self.class_list)}) from Google Cloud Storage" ) image_retriever = OpenCVImageClassRetriever(class_name=x) x_img, y_bbox = image_retriever.get_training_data() x_img_list.append(x_img) y_bbox_list.append(y_bbox) y_classif_list.append([x] * x_img.shape[0]) # Concatenate / Unnest Outer Lists x_img_list = np.vstack(x_img_list) y_bbox_list = np.vstack(y_bbox_list) y_classif_list = mf.unnest_list_of_lists(y_classif_list) return x_img_list, y_bbox_list, y_classif_list
def run_grid_search(self): # Output Lists output_categ_acc = [] output_exec_time = [] output_folds = [] output_models = [] output_model_number = [] # Train, Test, Validation Folds train_k, test_k, valid_k = self.train_test_val_folds() for iM, model in enumerate(self.model_list): for k in range(self.k_folds): # Separate Train and Test in Generators indices = self.get_fold_indices() train_i = mf.unnest_list_of_lists( [j for i, j in enumerate(indices) if i in train_k[k]]) test_i = mf.unnest_list_of_lists( [j for i, j in enumerate(indices) if i == test_k[k]]) valid_i = mf.unnest_list_of_lists( [j for i, j in enumerate(indices) if i == valid_k[k]]) train_gen = self.batch_generator(x[train_i], y[train_i], batch_size=self.batch_size) valid_gen = self.batch_generator(x[valid_i], y[valid_i], batch_size=self.batch_size) # Calculate Class Weights class_wt_dict = imm.make_class_weight_dict( [np.argmax(x) for x in y[train_i]], return_dict=True) # Define Callbacks check_point = keras.callbacks.ModelCheckpoint( self.model_save_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min') early_stop = keras.callbacks.EarlyStopping( monitor='val_loss', mode='min', patience=self.patience) # Train Model train_start_time = time.time() keras.backend.clear_session() # Define Model Compilation model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics) model.fit( train_gen, epochs=self.epochs, validation_data=valid_gen, steps_per_epoch=int(len(train_i)) // self.batch_size, validation_steps=int(len(valid_i)) // self.batch_size, callbacks=[check_point, early_stop, self.lr_schedule], class_weight=class_wt_dict, verbose=2) train_end_time = time.time() exec_time = train_end_time - train_start_time # Accuracy on Test Set saved_model = keras.models.load_model(self.model_save_name) pred_values = saved_model.predict(x[test_i]) output_categ_acc.append( np.mean( np.equal(np.argmax(y[test_i], axis=-1), np.argmax(pred_values, axis=-1)))) output_exec_time.append(exec_time) output_folds.append(k) output_models.append(saved_model.name) output_model_number.append(iM) mf.print_timestamp_message( f'Completed fold {k+1} of {self.k_folds} for model {iM+1} of {len(self.model_list)}' ) # Delete Variables in Memory del train_gen, valid_gen, check_point, early_stop, saved_model, pred_values keras.backend.clear_session() # Collate Fold Results into DataFrame output_df = pd.DataFrame({ 'model': output_models, 'model_number': output_model_number, 'fold': output_folds, 'categorical_accuracy': output_categ_acc, 'execution_time': output_exec_time }) return output_df
def save_whole_images_and_bbox(self): # Retrieve Class Metadata image_retriever = OpenCVImageClassRetriever(class_name=self.class_name) bbox_df = image_retriever.get_bounding_box_df() desc_df = image_retriever.get_class_desc_df() # Image IDs unique_img_ids = list( np.unique(bbox_df[self.image_id_col].values.tolist())) if self.max_images is not None: unique_img_ids = unique_img_ids[:self.max_images] # Read and Crop Images with Bounding Boxes img_id_list = [] img_list = [] coord_list = [] for img_id in tqdm.tqdm(unique_img_ids): try: # Subset Info Dataframes for Image ID bbox_df_i = bbox_df[bbox_df.ImageID == img_id] desc_df_i = desc_df[desc_df.ImageID == img_id] # Read Image img_i = read_url_image(desc_df_i['OriginalURL'].values[0]) # Extract Cropped Objects bbox_coords = bbox_df_i[['XMin', 'XMax', 'YMin', 'YMax']].values.tolist() for bbc in bbox_coords: xmin, xmax, ymin, ymax = bbc img_resized = resize( img_i, (self.resize_width, self.resize_height)) correct_shape = (self.resize_width, self.resize_height, 3) if (not is_blank_img(img_resized) and img_resized.shape == correct_shape): img_list.append(img_resized) coord_list.append(bbc) img_id_list.append(img_id) except: pass # Save Items class_folder_loc = f'{self.save_loc}{self.class_name}/' mf.create_folder_if_not_existing(class_folder_loc) mf.print_timestamp_message( f'Writing file with image IDs: {class_folder_loc}img_id_list.pkl') with open(f'{class_folder_loc}img_id_list.pkl', 'wb') as f: pickle.dump(img_id_list, f, protocol=4) mf.print_timestamp_message( f'Writing file with coordinates: {class_folder_loc}coord_list.pkl') with open(f'{class_folder_loc}coord_list.pkl', 'wb') as f: pickle.dump(coord_list, f, protocol=4) mf.print_timestamp_message( f'Writing file with numpy array: {class_folder_loc}img_arr.pkl') with open(f'{class_folder_loc}img_arr.pkl', 'wb') as f: pickle.dump(np.array(img_list), f, protocol=4) # Remove items from memory del img_list del coord_list del img_id_list
rect = patches.Rectangle((xmin_p, ymin_p), box_width, box_height, linewidth = linewidth, edgecolor = box_color, facecolor = 'none') ax.text(xmin_p, ymin_p + y_offset, labels[i], color = text_color, fontsize = fontsize) ax.add_patch(rect) plt.imshow(img_arr) plt.show() ### Data Processing: Read & REsize Images, Get Bounding Box Coordinates ############################################################################### image_id_list_dict = {} coord_list_dict = {} od_classes = cdp.config_obj_detection_classes for i, odc in enumerate(od_classes): mf.print_timestamp_message(f'Starting {odc} class {(i+1)} of {len(od_classes)}') image_retriever = DetectionImageRetriever(class_name = odc, max_images = 5000, resize_height = 416, resize_width = 416) img_coord_dict, img_array_dict = image_retriever.get_whole_images_and_bbox() img_id_list = list(img_coord_dict.keys()) image_id_list_dict[odc] = img_id_list for i, x in tqdm.tqdm(enumerate(img_id_list)): img_save_name = f'{intmd_save_loc}{x}.jpeg' im = Image.fromarray((img_array_dict.get(x) * 255).astype(np.uint8)) im.save(img_save_name) coord_list_dict[odc] = img_coord_dict del img_id_list, img_coord_dict, img_array_dict;