def generate_vec(): #getting dorsal vectors and class dorsal_paths = filter_images('dorsal') _, dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}}) #getting palmar vectors and class palmar_paths = filter_images('palmar') _, palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}}) #getting test vectors and their path test_data_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True) # Return the calculated values return dorsal_vectors, palmar_vectors, test_data, test_data_paths
def get_data_matrix(cls, feature, label=None, unlabelled=False, ignore_metadata=False): min_max_scaler = MinMaxScaler() f = {} if label: label_images = utils.filter_images(label) f = {'path': {'$in': label_images}} # Build and scale feature matrix images, feature_space = utils.get_all_vectors(feature, f=f, unlabelled_db=unlabelled) feature_space = min_max_scaler.fit_transform(feature_space) # Not including metadata boosts accuracy of Set 2 # Including metadata boosts accuracy of Set 1 if ignore_metadata: meta = utils.get_metadata(unlabelled_db=unlabelled) # Mapping between image file path name and the metadata meta = {m['path']: m for m in meta} return images, meta, feature_space # Build and scale metadata matrix meta, metadata_space = cls.get_metadata_space(images, unlabelled_db=unlabelled) metadata_space = min_max_scaler.fit_transform(metadata_space) # Column stack them data_matrix = np.c_[feature_space, metadata_space] return images, meta, data_matrix
def build_unlabelled(model): #DORSAL TEST_DATA dorsal_paths = filter_images('dorsal', unlabelled_db=True) _, u_dorsal_vectors = get_all_vectors(model, f={'path': {'$in': dorsal_paths}}, unlabelled_db=True) dorsal_class = np.array([-1] * len(u_dorsal_vectors)) #PALMAR TEST DATA palmar_paths = filter_images('palmar', unlabelled_db=True) _, u_palmar_vectors = get_all_vectors(model, f={'path': {'$in': palmar_paths}}, unlabelled_db=True) palmar_class = np.array([1] * len(u_palmar_vectors)) #STACK ALL TEST DATA AND LABELS test_data = np.vstack((u_dorsal_vectors, u_palmar_vectors)) test_labels = np.concatenate((dorsal_class,palmar_class)) return test_data, test_labels, np.concatenate((dorsal_paths,palmar_paths))
def build_labelled(model): # DORSAL TRAIN DATA dorsal_paths = filter_images('dorsal') _, dorsal_vectors = get_all_vectors(model, f={'path':{'$in': dorsal_paths}}) dorsal_class = np.array([-1] * len(dorsal_vectors)) #PALMAR TRAIN DATA _, palmar_vectors = get_all_vectors(model, f={'path': {'$nin': dorsal_paths}}) palmar_class = np.array([1] * len(palmar_vectors)) #TRAIN DATA STACKED AND CLASSES train_data = np.vstack((dorsal_vectors, palmar_vectors)) train_class = np.concatenate((dorsal_class, palmar_class)) return train_data, train_class
palmar_kmeans.centroids, return_min=True) return 'dorsal' if dorsal_dist <= palmar_dist else 'palmar' if __name__ == "__main__": parser = prepare_parser() args = parser.parse_args() n_clusters = args.n_clusters #get the absolute data path and models whose features to concatenate data_path = Path(settings.path_for(settings.DATA_PATH)) model = settings.TASK2_CONFIG.MODEL #Fetch training data for dorsal and palmer images from LABELLED DB dorsal_paths = filter_images('dorsal') dorsal_paths, dorsal_vectors = get_all_vectors( model, f={'path': { '$in': dorsal_paths }}) palmar_paths, palmar_vectors = get_all_vectors( model, f={'path': { '$nin': dorsal_paths }}) #Fetch test data from UNLABELLED DB test_data_paths, test_data = get_all_vectors(model, unlabelled_db=True) #Get centroids and centroid_labels for dorsal and palmar vectors print("Clustering dorsal vectors") dorsal_kmeans = Kmeans(dorsal_vectors, n_clusters)
print("converting annotations ...") # Convert annotations if args.task == "bbox": oi["annotations"] = utils.convert_instance_annotations( original_annotations, oi["images"], oi["categories"], start_index=0, classes=args.classes, ) elif args.task == "panoptic": oi["annotations"] = utils.convert_segmentation_annotations( original_segmentations, oi["images"], oi["categories"], original_mask_dir, segmentation_out_dir, start_index=0, ) oi["images"] = utils.filter_images(oi["images"], oi["annotations"]) # Write annotations into .json file filename = os.path.join( base_dir, "annotations/", "openimages_{}_{}_{}.json".format(args.version, subset, args.task), ) print("writing output to {}".format(filename)) json.dump(oi, open(filename, "w")) print("Done")
{'id': 2, 'name': 'Attribution-NonCommercial License', 'url': 'http://creativecommons.org/licenses/by-nc/2.0/'}, {'id': 3, 'name': 'Attribution-NonCommercial-NoDerivs License', 'url': 'http://creativecommons.org/licenses/by-nc-nd/2.0/'}, {'id': 4, 'name': 'Attribution License', 'url': 'http://creativecommons.org/licenses/by/2.0/'}, {'id': 5, 'name': 'Attribution-ShareAlike License', 'url': 'http://creativecommons.org/licenses/by-sa/2.0/'}, {'id': 6, 'name': 'Attribution-NoDerivs License', 'url': 'http://creativecommons.org/licenses/by-nd/2.0/'}, {'id': 7, 'name': 'No known copyright restrictions', 'url': 'http://flickr.com/commons/usage/'}, {'id': 8, 'name': 'United States Government Work', 'url': 'http://www.usa.gov/copyright.shtml'}] # Convert category information print('converting category info') oi['categories'] = utils.convert_category_annotations(original_category_info) # Convert image mnetadata print('converting image info ...') image_dir = os.path.join(base_dir, subset) oi['images'] = utils.convert_image_annotations(original_image_metadata, original_image_annotations, original_image_sizes, image_dir, oi['categories'], oi['licenses']) # Convert instance annotations print('converting annotations ...') # Convert annotations if args.task == 'bbox': oi['annotations'] = utils.convert_instance_annotations(original_annotations, oi['images'], oi['categories'], start_index=0) elif args.task == 'panoptic': oi['annotations'] = utils.convert_segmentation_annotations(original_segmentations, oi['images'], oi['categories'], original_mask_dir, segmentation_out_dir, start_index=0) oi['images'] = utils.filter_images(oi['images'], oi['annotations']) # Write annotations into .json file filename = os.path.join(base_dir, 'annotations/', 'openimages_{}_{}_{}.json'.format(args.version, subset, args.task)) print('writing output to {}'.format(filename)) json.dump(oi, open(filename, "w")) print('Done')