def main(): experiment_save_dir = r"C:\Users\janul\Desktop\thesis_tmp_files\responses" requests = get_queries() exps = [experiments(i) for i in [58, 59, 60]] for exp in exps: try: print(exp.__repr__()) if not exp: continue filename_hash = sha256(repr(exp).encode('utf-8')).hexdigest() responses_save_path = Path(experiment_save_dir, filename_hash).with_suffix(".npz") if (responses_save_path.exists()): print("Results already present.", responses_save_path) continue print("Output path:", responses_save_path) responses = exp.run(requests) FileStorage.save_data(responses_save_path, responses=responses, experiment=exp.__dict__, exp_repr=repr(exp), model=repr(exp.get_env().model), num_images=exp.num_images()) except Exception: continue
def main(): parser = argparse.ArgumentParser() parser.add_argument('--original', type=str) parser.add_argument('--to_fix', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() data_original = FileStorage.load_multiple_files_multiple_keys( args.original, retrieve_merged=['features'], num_files_limit=2)['features'] features_mean = np.mean(np.stack(data_original), axis=0) data_preprocessed = FileStorage.load_multiple_files_multiple_keys( args.to_fix, retrieve_merged=['features'], num_files_limit=5) preprocessed_features = np.stack(data_preprocessed['features']) preprocessed_fixed = preprocessed_features + features_mean new_data = {} for key in data_preprocessed.keys(): if key == 'features': new_data['features'] = preprocessed_fixed else: new_data[key] = data_preprocessed[key] FileStorage.save_data(Path(args.output, 'fixed_data'), **new_data)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--y_input", default=None, type=str) parser.add_argument('--x_input', default=None, type=str) parser.add_argument('--sample_size', default=500, type=int) args = parser.parse_args() y_dataset = FileStorage.load_multiple_files_multiple_keys( args.y_input, retrieve_merged=['features', 'paths'], num_files_limit=75) x_dataset = FileStorage.load_multiple_files_multiple_keys( args.x_input, retrieve_merged=['features', 'paths'], num_files_limit=75) y_features = np.array(y_dataset['features']) x_features = np.array(x_dataset['features']) y_paths = y_dataset['paths'] x_paths = x_dataset['paths'] # assert y_paths == x_paths sampled_idxs = np.random.choice(np.arange(len(y_features)), args.sample_size, replace=False) y_sampled = y_features[sampled_idxs] x_sampled = x_features[sampled_idxs] y_similarities = cosine_similarity(y_sampled) x_similarities = cosine_similarity(x_sampled) y_similarities = y_similarities.reshape(-1) x_similarities = x_similarities.reshape(-1) arg_sorted = np.argsort(x_similarities) fig, ax = plt.subplots() ax.plot(x_similarities[arg_sorted], y_similarities[arg_sorted], 'x', markersize=0.02, label='consine similarities') ax.plot((0, 1), label='Diagonal') ax.set_xlim((-1, 1)) ax.set_xlabel(Path(args.x_input).name) ax.set_ylabel(Path(args.y_input).name) ax.set_ylim((-1, 1)) mse = ((x_similarities - y_similarities)**2).mean() ax.set_title("mse: {:.5f}".format(mse)) lgnd = plt.legend(loc='upper left') lgnd.legendHandles[0]._legmarker.set_markersize(2) plt.show()
def sample_image_paths(path: str, samples: int) -> List[str]: """Reads preprocessed features and extracts only paths to randomly selected images.""" source_images = FileStorage.load_multiple_files_multiple_keys( path, retrieve_merged=['paths'])['paths'] unique_source_images = set(source_images) sampled_paths = random.sample(unique_source_images, samples) return sampled_paths
def test_load_multiple_files_multiple_keys(self): paths = sample_image_paths(self.regions_dataset, 100) result = FileStorage.load_multiple_files_multiple_keys( self.antepenultimate_small, retrieve_merged=['paths', 'features'], key_filter=('paths', paths)) self.assertEqual(len(paths), len(set(result['paths'])))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() keys_merged = {'crops', 'paths', 'features'} first_file_name = str(next(Path(args.input).rglob("*.npz"))) first_file = FileStorage.load_data_from_file(first_file_name) keys_available = set(first_file.keys()) keys_once = keys_available - keys_merged data = FileStorage.load_multiple_files_multiple_keys(args.input, retrieve_merged=list(keys_available - keys_once), retrieve_once=list(keys_once)) filename = Path(first_file_name).name.split(',')[0] FileStorage.save_data(Path(args.output, filename), **data)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) args = parser.parse_args() for file in Path(args.input).rglob("*.npz"): print(file.name) data = np.load(str(file), allow_pickle=True) new_data = {} for key in data.keys(): if key == 'features': new_data['features'] = GlobalAveragePooling2D()( data['features']).numpy() else: new_data[key] = data[key] FileStorage.save_data(Path(args.output, file.name), **new_data)
def test_load_features_datafiles(self): result = FileStorage.load_multiple_files_multiple_keys( self.regions_dataset, retrieve_merged=['crops', 'paths', 'features'], retrieve_once=['pipeline', 'model']) self.assertGreater(len(result['paths']), 0) self.assertEqual(len(result['paths']), len(result['crops'])) self.assertEqual(len(result['features']), len(result['crops'])) self.assertTrue('pipeline' in result) self.assertTrue('model' in result)
def init(self): if self.initialized: return self.initialized = True print("Initializing environment, this may take a while.") self.data = FileStorage.load_multiple_files_multiple_keys(path=self.data_path, retrieve_merged=['features', 'paths'], retrieve_once=['pipeline', 'model']) self.preprocessing = pickle.loads(self.data['pipeline']) self.model = model_factory(str(self.data['model'])) self.data['features'] = np.array(self.data['features']) self.features = self.data['features']
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input', type=str) parser.add_argument('--output', type=str) parser.add_argument('--sample_size', type=int, default=100) args = parser.parse_args() random.seed(42) requests = get_queries() queries_paths = [r.query_image for r in requests] selected_paths = sample_image_paths(args.input, args.sample_size) selected_paths += queries_paths sample_args = ['paths', 'features', 'crops'] for file in Path(args.input).rglob("*.npz"): if Path(args.output, file.name).exists(): print("skipping", file.name, "already exists") continue data = np.load(str(file), allow_pickle=True) idxs = np.array([ i_path for i_path, path in enumerate(data['paths']) if path in selected_paths ]) if len(idxs) == 0: continue new_data = {} for key in data.keys(): if key in sample_args: new_data[key] = data[key][idxs] else: new_data[key] = data[key] FileStorage.save_data(Path(args.output, file.name), **new_data)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str) parser.add_argument('--output', default=None, type=str) parser.add_argument("--crop_size", default=0.1, type=float) args = parser.parse_args() database = Database(FileStorage.load_datafiles(args.input)) paths, crops, features = convert_individual_records_to_groups( database, crop_min_size=args.crop_size) output_path = Path(args.output, "faces.npz") storage.FileStorage.save_data(path=output_path, features=features, crops=crops, paths=paths)
def main(): data_path = r"C:\Users\janul\Desktop\thesis_tmp_files\face_features_only_bigger_10percent_316videos" data = FileStorage.load_multiple_files_multiple_keys( path=data_path, retrieve_merged=['features', 'crops', 'paths']) features, paths, crops = data['features'], data['paths'], data['crops'] som = SOM((50, 50), 128) som.som = load_from_file( r"C:\Users\janul\Desktop\thesis_tmp_files\cosine_som\euclidean\200k-original\som-euclidean,200000-200000.pickle" ) som.set_representatives(features) present_frames = np.unique(som.representatives.flatten()) print("Unique images included", len(present_frames)) present_videos_set = {paths[i_present][:6] for i_present in present_frames} all_videos_set = {path[:6] for path in paths} print(all_videos_set - present_videos_set) # No missing video np.random.seed(42) selected_images_for_experiment = np.random.choice(paths, 10, replace=False) print(selected_images_for_experiment) for selected in selected_images_for_experiment: # show_image(selected) pass missing_ids = set(range(0, len(paths))) - set( som.representatives.flatten()) print(len(missing_ids)) min_distance = [] for missing_id in missing_ids: distances = [] for face_id in set(som.representatives.flatten()): distances.append( np.linalg.norm(features[face_id] - features[missing_id])) min_distance.append(np.min(distances)) filt = [i for i in min_distance if i > 0.45] print(len(filt)) print(max(min_distance))
def init(self): if self.initialized: return self.initialized = True print("Initializing environment, this may take a while.") self.data = FileStorage.load_multiple_files_multiple_keys(path=self.data_path, retrieve_merged=['features', 'crops', 'paths'], retrieve_once=['pipeline', 'model']) if not self.data: print("Data for Regions do not contain the correct information. Environment not initialized.") self.initialized = False return self.preprocessing = pickle.loads(self.data['pipeline']) self.model = model_factory(str(self.data['model'])) self.data['features'] = np.array(self.data['features']) self.regions_data = RegionsData(self.data)
def __init__(self, data_path, som_path): data = FileStorage.load_multiple_files_multiple_keys( path=data_path, retrieve_merged=['features', 'crops', 'paths']) if not data: print("Data for faces could not be obtined.") return Environment.features = data['features'] Environment.paths = data['paths'] Environment.crops = data['crops'] Environment.features_info = [] for i_crop, (path, crop) in enumerate( zip(Environment.paths, Environment.crops)): Environment.features_info.append( FaceCrop(src=path, crop=crop, idx=i_crop)) self.som = SOM((50, 50), 128) if not Path(som_path).exists(): print("Underlying SOM data not found.") return som_path = next(Path(som_path).rglob("*.pickle")) self.som.som = load_from_file(som_path) self.som.set_representatives(Environment.features) if self.use_random_grid: max_display_width = 20 random_grid = np.arange(len(self.features)) if len(random_grid) % max_display_width: suffix = np.ones( max_display_width - len(random_grid) % max_display_width, dtype=np.int32) * random_grid[-1] random_grid = np.concatenate([random_grid, suffix]) self.som.representatives = random_grid.reshape( -1, max_display_width) self.initialized = True
def main(): parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str) parser.add_argument("--som", default=None, type=str) args = parser.parse_args() data = FileStorage.load_multiple_files_multiple_keys( path=args.input, retrieve_merged=['features', 'crops', 'paths']) features = data['features'] data = np.vstack(features) q_error = [] t_error = [] files = list(Path(args.som).rglob("*.pickle")) for file in sorted(files, key=lambda f: f.stat().st_mtime): som = load_from_file(file) q_error.append(som.quantization_error(data)) t_error.append(som.topographic_error(data)) step = 1000 plt.plot(np.arange(len(files) * step, step=step) / 1000, q_error, label='Quantization error') plt.plot(np.arange(len(files) * step, step=step) / 1000, t_error, label='Topographic error') plt.ylabel('Error') plt.xlabel('Iteration ($\\times 10^3$)') plt.legend() plt.savefig("som_errors.pdf", bbox_inches='tight') plt.show() print(q_error) print(t_error)
data = np.load(data_path, allow_pickle=True) features = data['features'] print("Videos with faces", len(set([prefix.split("/")[0] for prefix in data['paths']]))) y_pred = fclusterdata(features, t=0.6, criterion='distance', method='complete') print(len(y_pred)) print(len(set(y_pred))) representatives = [] for cluster_id in set(y_pred): features_ids = np.argwhere(y_pred == cluster_id) cluster_items = features[features_ids] centroid = np.mean(cluster_items, axis=0) closest_to_centroid_idx = np.argmin([np.linalg.norm(x - centroid) for x in cluster_items]) closest = features_ids[closest_to_centroid_idx] assert y_pred[closest] == cluster_id representatives.append(closest) new_data_path = r'C:\Users\janul\Desktop\thesis_tmp_files\face_features_only_bigger_10percent_316videos_only_representatives\faces.npz' new_data = {} new_data['crops'] = data['crops'][representatives] new_data['paths'] = data['paths'][representatives] new_data['features'] = data['features'][representatives] FileStorage.save_data(Path(new_data_path), **new_data)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--images_dir", default=None, type=str, help="Path to image directory.") parser.add_argument("--save_location", default="", type=str, help="Path to directory where precomputed models are saved.") parser.add_argument("--input_size", default=96, type=int, help="Input shape for model (square width)") parser.add_argument("--batch_size", default=128, type=int, help="Batch size for processing") parser.add_argument("--num_regions", default=None, type=str, help="Number of regions \"vertically,horizzontaly\".") parser.add_argument('--feature_model', default='resnet50v2', type=str, help='Feature vector model to compute (default: %(default)s)') args = parser.parse_args() input_shape = (args.input_size, args.input_size, 3) num_regions = tuple(map(int, args.num_regions.split(","))) if args.num_regions else None if args.feature_model == 'resnet50v2' and num_regions: features_model = Resnet50V2(input_shape=input_shape) evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions) elif args.feature_model == 'resnet50v2': features_model = Resnet50V2(input_shape=input_shape) evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'resnet50v2antepenultimate': features_model = Resnet50V2Antepenultimate(input_shape=input_shape) evaluation_mechanism = EvaluatingSpatially(model=features_model) elif args.feature_model == 'mobilenetv2' and num_regions: features_model = MobileNetV2(input_shape=input_shape) evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions) elif args.feature_model == 'mobilenetv2': features_model = MobileNetV2(input_shape=input_shape) evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'mobilenetv2antepenultimate': features_model = MobileNetV2Antepenultimate(input_shape=input_shape) evaluation_mechanism = EvaluatingSpatially(model=features_model) elif args.feature_model == 'Resnet50_11k_classes' and num_regions: features_model = Resnet50_11k_classes() if args.input_size: regions_size = (args.input_size, args.input_size, 3) else: regions_size = None evaluation_mechanism = EvaluatingRegions(model=features_model, num_regions=num_regions, regions_size=regions_size) elif args.feature_model == 'Resnet50_11k_classes': features_model = Resnet50_11k_classes() evaluation_mechanism = EvaluatingWholeImage(model=features_model) elif args.feature_model == 'faces': evaluation_mechanism = EvaluatingFaces() else: raise ValueError('Unknown `feature_model`.') directories = FileStorage.directories(args.images_dir) or [args.images_dir] print("Found %d directories." % len(directories)) images_features = [] for directory in directories: save_location = Path(args.save_location, filename(args.feature_model, Path(directory).name, extension='.npz')) if save_location.exists(): print("Skipping directory {}".format(directory)) continue print("Processing directory {}".format(directory)) for images_data in batches(FileStorage.load_images_continuously(directory), batch_size=args.batch_size): features = evaluation_mechanism.features([sample.image for sample in images_data]) for image_features, image_data in zip(features, images_data): images_features.append( DatabaseRecord(filename=str(Path(image_data.filename).relative_to(args.images_dir).as_posix()), features=image_features)) FileStorage.save_data(Path(args.save_location, filename(args.feature_model, Path(directory).name)), data=images_features, src_dir=args.images_dir, model=repr(evaluation_mechanism.model)) images_features = []
def main(): parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str) parser.add_argument('--output', default=None, type=str) parser.add_argument('--pretrained', default=None, type=str) parser.add_argument('--iterations', default=100, type=int) parser.add_argument('--learning_rate', default=0.5, type=float) parser.add_argument('--sigma', default=1, type=float) parser.add_argument('--distance', default='euclidean', type=str) args = parser.parse_args() data = FileStorage.load_multiple_files_multiple_keys( path=args.input, retrieve_merged=['features', 'crops', 'paths']) features = data['features'] data = np.vstack(features) seed_sets = [(10, 100), (42, 4242), (24, 2424), (4242, 24), (1, 1), (4242, 42), (71, 37), (678, 123), (321, 87), (3, 980)] np_seed, som_seed = seed_sets[8] np.random.seed(np_seed) som = MiniSom(50, 50, data.shape[1], random_seed=som_seed, activation_distance=args.distance, learning_rate=args.learning_rate, sigma=args.sigma) if args.pretrained: som = load_from_file(args.pretrained) som._learning_rate = args.learning_rate som._sigma = args.sigma max_iter = args.iterations q_error = [] t_error = [] errors_step = 1000 for i in range(max_iter + 1): if (i + 1) % errors_step == 0: print("Iteration", i + 1, "/", max_iter) if i % errors_step == 0: q_error.append(som.quantization_error(data)) t_error.append(som.topographic_error(data)) print("Quantization error:", q_error[-1]) print("Topographic error:", t_error[-1]) if args.output: som_log_file = Path( args.output, "som-{},{}-{}.pickle".format(args.distance, i, args.iterations)) dump_to_file(som_log_file, som) rand_i = np.random.randint(len(data)) som.update(data[rand_i], som.winner(data[rand_i]), i, max_iter) experiment_description = ";".join( [args.distance, str(args.iterations), str(np_seed), str(som_seed)]) plt.plot(np.arange(max_iter + 1, step=errors_step), q_error, label='quantization error') plt.plot(np.arange(max_iter + 1, step=errors_step), t_error, label='topographic error') plt.ylabel('quantization error') plt.xlabel('iteration index') plt.title(experiment_description) plt.legend() plt.show()
parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str) parser.add_argument('--output', default=None, type=str) args = parser.parse_args() for file in Path(args.input).rglob("*.npz"): save_location = Path(args.output, file.name) if save_location.exists(): print("Skipping {}. Already present.".format(save_location)) continue data = np.load(str(file), allow_pickle=True) new_db_records = [] for filepath, features in data['data']: image_features = [] for regions_features in features: avg_pool_features = np.mean(regions_features.features, axis=(0, 1)) # There is no batch image_features.append( RegionFeatures(crop=regions_features.crop, features=avg_pool_features)) new_db_records.append( DatabaseRecord(filename=filepath, features=image_features)) FileStorage.save_data(Path(args.output, file.name), data=new_db_records, src_dir=data['src_dir'], model=data['model'])