def preprocess_image(self, inputs): """ Takes as input an image and prepares it for being passed through the network. """ return preprocess_image(inputs, mode='caffe')
def detectObjectsFromImage_Me(self, input_image="", output_image_path="", input_type="file", output_type="file", extract_detected_objects=False, minimum_percentage_probability=50, display_percentage_probability=True, display_object_name=True, display_box=True, thread_safe=False, custom_objects=None): """ 'detectObjectsFromImage()' function is used to detect objects observable in the given image path: * input_image , which can be a filepath, image numpy array or image file stream * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file" * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream" * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array" * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path. * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output. * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph. The values returned by this function depends on the parameters parsed. The possible values returnable are stated as below - If extract_detected_objects = False or at its default value and output_type = 'file' or at its default value, you must parse in the 'output_image_path' as a string to the path you want the detected image to be saved. Then the function will return: 1. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) - If extract_detected_objects = False or at its default value and output_type = 'array' , Then the function will return: 1. a numpy array of the detected image 2. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) - If extract_detected_objects = True and output_type = 'file' or at its default value, you must parse in the 'output_image_path' as a string to the path you want the detected image to be saved. Then the function will return: 1. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) 2. an array of string paths to the image of each object extracted from the image - If extract_detected_objects = True and output_type = 'array', the the function will return: 1. a numpy array of the detected image 2. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) 3. an array of numpy arrays of each object detected in the image :param input_image: :param output_image_path: :param input_type: :param output_type: :param extract_detected_objects: :param minimum_percentage_probability: :param display_percentage_probability: :param display_object_name: :param thread_safe: :return image_frame: :return output_objects_array: :return detected_objects_image_array: """ if (self.__modelLoaded == False): raise ValueError("You must call the loadModel() function before making object detection.") elif (self.__modelLoaded == True): try: model_detections = list() detections = list() image_copy = None detected_objects_image_array = [] min_probability = minimum_percentage_probability / 100 if (input_type == "file"): input_image = cv2.imread(input_image) elif (input_type == "array"): input_image = np.array(input_image) detected_copy = input_image image_copy = input_image if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"): image_h, image_w, _ = detected_copy.shape detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size) model = self.__model_collection[0] yolo_result = model.predict(detected_copy) model_detections = retrieve_yolo_detections(yolo_result, self.__yolo_anchors, min_probability, self.__nms_thresh, self.__yolo_model_image_size, (image_w, image_h), self.numbers_to_names) elif (self.__modelType == "retinanet"): detected_copy = preprocess_image(detected_copy) detected_copy, scale = resize_image(detected_copy) model = self.__model_collection[0] boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0)) boxes /= scale for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < min_probability: break detection_dict = dict() detection_dict["name"] = self.numbers_to_names[label] detection_dict["percentage_probability"] = score * 100 detection_dict["box_points"] = box.astype(int).tolist() model_detections.append(detection_dict) counting = 0 objects_dir = output_image_path + "-objects" for detection in model_detections: counting += 1 label = detection["name"] percentage_probability = detection["percentage_probability"] box_points = detection["box_points"] if (custom_objects is not None): if (custom_objects[label] != "valid"): continue detections.append(detection) if display_object_name == False: label = None if display_percentage_probability == False: percentage_probability = None image_copy = draw_boxes(image_copy, box_points, display_box, label, percentage_probability, self.__box_color) if (extract_detected_objects == True): splitted_copy = image_copy.copy()[box_points[1]:box_points[3], box_points[0]:box_points[2]] if (output_type == "file"): if (os.path.exists(objects_dir) == False): os.mkdir(objects_dir) splitted_image_path = os.path.join(objects_dir, detection["name"] + "-" + str( counting) + ".jpg") cv2.imwrite(splitted_image_path, splitted_copy) detected_objects_image_array.append(splitted_image_path) elif (output_type == "array"): detected_objects_image_array.append(splitted_copy) image_copy = padded_fragment(image_copy) if (output_type == "file"): cv2.imwrite(output_image_path, image_copy) if (extract_detected_objects == True): if (output_type == "file"): return detections, detected_objects_image_array elif (output_type == "array"): return image_copy, detections, detected_objects_image_array else: if (output_type == "file"): return detections elif (output_type == "array"): return image_copy, detections except: raise ValueError( "Ensure you specified correct input image, input type, output type and/or output image path ")
def main(args): categories = cfg.CATEGORIES num_classes = len(categories) device = torch.device(args.device) model = vgg19(pretrained=True).train().to(device) # change the last layer for finetuning classifier = model.classifier num_ftrs = classifier[-1].in_features new_classifier = torch.nn.Sequential( *(list(model.classifier.children())[:-1]), nn.Linear(num_ftrs, num_classes).to(device)) model.classifier = new_classifier model.train() batch_size = args.batch_size epoch_size = args.epoch_size #use values to normiulize the input as in torchvision ImageNet guide mean = cfg.MEAN std = cfg.STD batch_size_dict = {'train': batch_size, 'test': batch_size} rds = data.RawDataset(root_dir=args.dataset_path, num_workers=args.workers_num, output_dims=cfg.INPUT_DIMS, batch_size_dict=batch_size_dict) test_first = bool(args.test_first) cl_factor = args.cl_loss_factor am_factor = args.am_loss_factor epochs = args.nepoch loss_fn = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # a GAIN model which saves the chosen classification model and calculates # the gradients w.r.t the grad_layer and performs GAIN algorithm gain = batch_GAIN_VOC(model=model, grad_layer='features', num_classes=num_classes, pretraining_epochs=args.npretrain, test_first=test_first, grads_off=bool(args.grads_off), grads_magnitude=args.grads_magnitude, device=device) i = 0 num_train_samples = 0 chkpnt_epoch = 0 if len(args.checkpoint_file_path_load) > 0: checkpoint = torch.load('args.checkpoint_file_path_load') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) chkpnt_epoch = checkpoint['epoch'] + 1 i = checkpoint['iteration'] + 1 num_train_samples = checkpoint['num_train_samples'] writer = SummaryWriter( args.logging_path + args.logging_name + '_' + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) writer.add_text('Start', 'start') print('Started') for epoch in range(chkpnt_epoch, epochs): total_train_single_accuracy = 0 total_test_single_accuracy = 0 epoch_train_cl_loss = 0 model.train(True) if not test_first or (test_first and epoch != 0): total_train_single_accuracy = 0 total_test_single_accuracy = 0 epoch_train_am_loss = 0 epoch_train_cl_loss = 0 epoch_train_total_loss = 0 for sample in rds.datasets['rnd_train']: augmented_batch = [] batch, augmented = preprocess_image( sample[0][0].squeeze().cpu().detach().numpy(), train=True, mean=mean, std=std) augmented_batch.append(augmented) for img in sample[0][1:]: input_tensor, augmented_image = preprocess_image( img.squeeze().cpu().detach().numpy(), train=True, mean=mean, std=std) batch = torch.cat((batch, input_tensor), dim=0) augmented_batch.append(augmented_image) batch = batch.to(device) optimizer.zero_grad() labels = sample[2] logits_cl, logits_am, heatmap, masked_image, mask = gain( batch, sample[1]) class_onehot = torch.stack(sample[1]).float() cl_loss = loss_fn(logits_cl, class_onehot) am_scores = nn.Softmax(dim=1)(logits_am) batch_am_lables = [] batch_am_labels_scores = [] for k in range(len(batch)): num_of_labels = len(sample[2][k]) _, am_labels = am_scores[k].topk(num_of_labels) batch_am_lables.append(am_labels) am_labels_scores = am_scores[k].view(-1)[ labels[k]].sum() / num_of_labels batch_am_labels_scores.append(am_labels_scores) am_loss = sum(batch_am_labels_scores) / batch_size # g = make_dot(am_loss, dict(gain.named_parameters()), show_attrs = True, show_saved = True) # g.save('grad_viz', train_path) total_loss = cl_loss * cl_factor + am_loss * am_factor epoch_train_am_loss += (am_loss * am_factor).detach().cpu().item() epoch_train_cl_loss += (cl_loss * cl_factor).detach().cpu().item() epoch_train_total_loss += total_loss.detach().cpu().item() writer.add_scalar('Per_Step/train/cl_loss', (cl_loss * cl_factor).detach().cpu().item(), i) writer.add_scalar('Per_Step/train/am_loss', (am_loss * am_factor).detach().cpu().item(), i) writer.add_scalar('Per_Step/train/total_loss', total_loss.detach().cpu().item(), i) loss = cl_loss * cl_factor if gain.AM_enabled(): loss += am_loss * am_factor loss.backward() optimizer.step() # Single label evaluation for k in range(len(batch)): num_of_labels = len(sample[2][k]) _, y_pred = logits_cl[k].detach().topk(k=num_of_labels) y_pred = y_pred.view(-1) gt = torch.tensor(sorted(sample[2][k]), device=device) acc = (y_pred == gt).sum() total_train_single_accuracy += acc.detach().cpu() # Multi label evaluation #_, y_pred_multi = logits_cl.detach().topk(num_of_labels) #y_pred_multi = y_pred_multi.view(-1) #acc_multi = (y_pred_multi == gt).sum() / num_of_labels #total_train_multi_accuracy += acc_multi.detach().cpu() if i % args.record_itr_train == 0: for t in range(args.nrecord): num_of_labels = len(sample[2][t]) one_heatmap = heatmap[t].squeeze().cpu().detach( ).numpy() one_augmented_im = torch.tensor( np.array( augmented_batch[t])).to(device).unsqueeze(0) one_masked_image = masked_image[t].detach().squeeze() htm = deprocess_image(one_heatmap) visualization, red_htm = show_cam_on_image( one_augmented_im.cpu().detach().numpy(), htm, True) viz = torch.from_numpy(visualization).to(device) masked_im = denorm(one_masked_image, mean, std) masked_im = (masked_im.squeeze().permute([1, 2, 0]) .cpu().detach().numpy() * 255).round()\ .astype(np.uint8) orig = sample[0][t].unsqueeze(0) masked_im = torch.from_numpy(masked_im).unsqueeze( 0).to(device) orig_viz = torch.cat( (orig, one_augmented_im, viz, masked_im), 0) grid = torchvision.utils.make_grid( orig_viz.permute([0, 3, 1, 2])) gt = [categories[x] for x in labels[t]] writer.add_image(tag='Train_Heatmaps/image_' + str(i) + '_' + str(t) + '_' + '_'.join(gt), img_tensor=grid, global_step=epoch, dataformats='CHW') y_scores = nn.Softmax()(logits_cl[t].detach()) _, predicted_categories = y_scores.topk(num_of_labels) predicted_cl = [(categories[x], format(y_scores.view(-1)[x], '.4f')) for x in predicted_categories.view(-1)] labels_cl = [(categories[x], format(y_scores.view(-1)[x], '.4f')) for x in labels[t]] import itertools predicted_cl = list(itertools.chain(*predicted_cl)) labels_cl = list(itertools.chain(*labels_cl)) cl_text = 'cl_gt_' + '_'.join( labels_cl) + '_pred_' + '_'.join(predicted_cl) predicted_am = [(categories[x], format(am_scores[t].view(-1)[x], '.4f')) for x in batch_am_lables[t].view(-1)] labels_am = [(categories[x], format(am_scores.view(-1)[x], '.4f')) for x in labels[t]] import itertools predicted_am = list(itertools.chain(*predicted_am)) labels_am = list(itertools.chain(*labels_am)) am_text = '_am_gt_' + '_'.join( labels_am) + '_pred_' + '_'.join(predicted_am) writer.add_text('Train_Heatmaps_Description/image_' + str(i) + '_' + str(t) + '_' + '_'.join(gt), cl_text + am_text, global_step=epoch) i += 1 if epoch == 0 and test_first == False: num_train_samples += 1 if epoch == 1 and test_first == True: num_train_samples += 1 if i % epoch_size == 0: break model.train(False) j = 0 for sample in rds.datasets['seq_test']: batch, _ = preprocess_image( sample[0][0].squeeze().cpu().detach().numpy(), train=False, mean=mean, std=std) for img in sample[0][1:]: input_tensor, input_image = preprocess_image( img.squeeze().cpu().detach().numpy(), train=False, mean=mean, std=std) batch = torch.cat((batch, input_tensor), dim=0) batch = batch.to(device) labels = sample[2] logits_cl, logits_am, heatmap, masked_image, mask = gain( batch, sample[1]) am_scores = nn.Softmax(dim=1)(logits_am) batch_am_lables = [] for k in range(len(batch)): num_of_labels = len(sample[2][k]) _, am_labels = am_scores[k].topk(num_of_labels) batch_am_lables.append(am_labels) # Single label evaluation for k in range(len(batch)): num_of_labels = len(sample[2][k]) _, y_pred = logits_cl[k].detach().topk(k=num_of_labels) y_pred = y_pred.view(-1) gt = torch.tensor(sorted(sample[2][k]), device=device) acc = (y_pred == gt).sum() total_test_single_accuracy += acc.detach().cpu() if j % args.record_itr_test == 0: for t in range(args.nrecord): num_of_labels = len(sample[2][t]) one_heatmap = heatmap[t].squeeze().cpu().detach().numpy() one_input_image = sample[0][t].cpu().detach().numpy() one_masked_image = masked_image[t].detach().squeeze() htm = deprocess_image(one_heatmap) visualization, heatmap = show_cam_on_image( one_input_image, htm, True) viz = torch.from_numpy(visualization).unsqueeze(0).to( device) augmented = torch.tensor(one_input_image).unsqueeze(0).to( device) masked_image = denorm(one_masked_image, mean, std) masked_image = (masked_image.squeeze().permute( [1, 2, 0]).cpu().detach().numpy() * 255).round().astype(np.uint8) orig = sample[0][t].unsqueeze(0) masked_image = torch.from_numpy(masked_image).unsqueeze( 0).to(device) orig_viz = torch.cat((orig, augmented, viz, masked_image), 0) grid = torchvision.utils.make_grid( orig_viz.permute([0, 3, 1, 2])) gt = [categories[x] for x in labels[t]] writer.add_image(tag='Test_Heatmaps/image_' + str(j) + '_' + '_'.join(gt), img_tensor=grid, global_step=epoch, dataformats='CHW') y_scores = nn.Softmax()(logits_cl[0].detach()) _, predicted_categories = y_scores.topk(num_of_labels) predicted_cl = [(categories[x], format(y_scores.view(-1)[x], '.4f')) for x in predicted_categories.view(-1)] labels_cl = [(categories[x], format(y_scores.view(-1)[x], '.4f')) for x in labels[t]] import itertools predicted_cl = list(itertools.chain(*predicted_cl)) labels_cl = list(itertools.chain(*labels_cl)) cl_text = 'cl_gt_' + '_'.join( labels_cl) + '_pred_' + '_'.join(predicted_cl) predicted_am = [(categories[x], format(am_scores[0].view(-1)[x], '.4f')) for x in batch_am_lables[0].view(-1)] labels_am = [(categories[x], format(am_scores.view(-1)[x], '.4f')) for x in labels[t]] import itertools predicted_am = list(itertools.chain(*predicted_am)) labels_am = list(itertools.chain(*labels_am)) am_text = '_am_gt_' + '_'.join( labels_am) + '_pred_' + '_'.join(predicted_am) writer.add_text('Test_Heatmaps_Description/image_' + str(j) + '_' + '_'.join(gt), cl_text + am_text, global_step=epoch) j += 1 num_test_samples = len(rds.datasets['seq_test']) * batch_size print("finished epoch number:") print(epoch) if (test_first and epoch > 0) or test_first == False: writer.add_scalar( 'Loss/train/cl_total_loss', epoch_train_cl_loss / (num_train_samples * batch_size), epoch) writer.add_scalar('Loss/train/am_tota_loss', epoch_train_am_loss / num_train_samples, epoch) writer.add_scalar('Loss/train/combined_total_loss', epoch_train_total_loss / num_train_samples, epoch) writer.add_scalar( 'Accuracy/train/cl_accuracy', total_train_single_accuracy / (num_train_samples * batch_size), epoch) writer.add_scalar('Accuracy/test/cl_accuracy', total_test_single_accuracy / num_test_samples, epoch) gain.increase_epoch_count() if len(args.checkpoint_file_path_save ) > 0 and epoch % args.checkpoint_nepoch == 0: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'iteration': i, 'num_train_samples': num_train_samples }, args.checkpoint_file_path_save + datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
'train': 18, 'tvmonitor': 19 } labels_to_names = {} for key, value in voc_classes.items(): labels_to_names[value] = key # load image image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg') for image_path in image_paths: image = read_image_bgr(image_path) # copy to draw on draw = image.copy() # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) # process image start = time.time() # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0)) boxes, scores, labels = model.predict_on_batch( np.expand_dims(image, axis=0)) print("processing time: ", time.time() - start) # correct for image scale boxes /= scale labels_to_locations = {} # visualize detections for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break
def preprocess_image(self, image): return preprocess_image(image)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce image detection predictions. Parameters ---------- inputs : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, and bounding box for each image. Returns ------- outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as a string (8 coordinate format), and confidence scores. """ iou_threshold = 0.5 # Bounding box overlap threshold for false positive or true positive score_threshold = 0.05 # The score confidence threshold to use for detections max_detections = 100 # Maxmimum number of detections to use per image # Convert training model to inference model inference_model = models.convert_model(self.training_model) # Generate image paths image_cols = inputs.metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/FileName') self.base_dir = [inputs.metadata.query((metadata_base.ALL_ELEMENTS, t))['location_base_uris'][0].replace('file:///', '/') for t in image_cols] self.image_paths = np.array([[os.path.join(self.base_dir, filename) for filename in inputs.iloc[:,col]] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten() self.image_paths = pd.Series(self.image_paths) # Initialize output objects box_list = [] score_list = [] image_name_list = [] # Predict bounding boxes and confidence scores for each image image_list = [x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i] start_time = time.time() print('Starting testing...', file = sys.__stdout__) for i in image_list: image = read_image_bgr(i) # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) boxes, scores, labels = inference_model.predict_on_batch(np.expand_dims(image, axis = 0)) # correct for image scale boxes /= scale for box, score in zip(boxes[0], scores[0]): if score < 0.5: break b = box.astype(int) box_list.append(b) score_list.append(score) image_name_list.append(i * len(b)) print(f'Testing complete. Testing took {time.time()-start_time} seconds.', file = sys.__stdout__) ## Convert predicted boxes from a list of arrays to a list of strings boxes = np.array(box_list).tolist() boxes = list(map(lambda x : [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes)) # Convert to 8 coordinate format for D3M boxes = list(map(lambda x : ",".join(map(str, x)), boxes)) # Create mapping between image names and D3M index input_df = pd.DataFrame({ 'd3mIndex': inputs.d3mIndex, 'image': [os.path.basename(list) for list in self.image_paths] }) d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list') # Extract values for image name keys and get missing image predictions (if they exist) image_name_list = [os.path.basename(list) for list in image_name_list] d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list] empty_predictions_image_names = [k for k,v in d3mIdx_image_mapping.items() if v not in d3mIdx] d3mIdx = [item for sublist in d3mIdx for item in sublist] # Flatten list of lists ## Assemble in a Pandas DataFrame results = pd.DataFrame({ 'd3mIndex': d3mIdx, 'bounding_box': boxes, 'confidence': score_list }) # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return # predictions for an image, create a dummy empty prediction row to add to results_df for that # missing image. if len(empty_predictions_image_names) != 0: # Create data frame of empty predictions for missing each image and concat with results. # Sort results_df. empty_predictions_df = self._fill_empty_predictions(empty_predictions_image_names, d3mIdx_image_mapping) results_df = pd.concat([results, empty_predictions_df]).sort_values('d3mIndex') # Convert to DataFrame container results_df = d3m_DataFrame(results_df) ## Assemble first output column ('d3mIndex) col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0))) col_dict['structural_type'] = type("1") col_dict['name'] = 'd3mIndex' col_dict['semantic_types'] = ('http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 0), col_dict) ## Assemble second output column ('bounding_box') col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))) col_dict['structural_type'] = type("1") col_dict['name'] = 'bounding_box' col_dict['semantic_types'] = ('http://schema.org/Text', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 1), col_dict) ## Assemble third output column ('confidence') col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))) col_dict['structural_type'] = type("1") col_dict['name'] = 'confidence' col_dict['semantic_types'] = ('http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Score') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 2), col_dict) return CallResult(results_df)