def test_api_get_image_data(): with ImagedirCtx() as ctx: # run 1: create fingerprints database, run clustering images, fingerprints, timestamps = icio.get_image_data(ctx.imagedir) # run 2: only run clustering, should be much faster, this time use all # kwds (test API) images, fingerprints, timestamps = icio.get_image_data( ctx.imagedir, pca_kwds=dict(n_components=0.95), model_kwds=dict(layer='fc2'), img_kwds=dict(size=(224, 224)), timestamps_kwds=dict(source='auto')) assert len(fingerprints.keys()) == len(ctx.image_fns) assert set(fingerprints.keys()) == set(ctx.image_fns)
def main(): clusters_path = os.path.join(IMAGE_PATH, icio.ic_base_dir, 'clusters') # The bottleneck is calc.fingerprints() called in this function, all other # operations are very fast. get_image_data() writes fingerprints to disk and # loads them again instead of re-calculating them. print('\nFingerprinting images...\n') images, fingerprints, timestamps = icio.get_image_data(IMAGE_PATH) print('\nImage fingerprinting done.\n') # Run clustering on the fingerprints. Select clusters with similarity index print('\nClustering images...\n') clusters = calc.cluster(fingerprints, sim=SIMILARITY) print('\nClustering done.\n') # Re-format clusters into a simple 2D list simple_clusters = list() for i, (num_in_cluster, cluster_list) in enumerate(clusters.items()): for cluster in cluster_list: simple_clusters.append(cluster) # Find unclustered images unclustered_images = set(images.keys()) # Start set with all images for cluster in simple_clusters: for image in cluster: unclustered_images = unclustered_images.difference(set([image])) unclustered_images = list(unclustered_images) # Convert to list if ACTION == 'copy': print('\nCopying images to clusters...\n') elif ACTION == 'move': print('\nMoving images to clusters...\n') # Remove existing clusters (if present) if os.path.exists(clusters_path): shutil.rmtree(clusters_path) # Move images into cluster folders cluster_dir_length = len(str(len(simple_clusters))) for i, cluster in enumerate(simple_clusters): cluster_name = str(i).zfill(cluster_dir_length) cluster_dir = os.path.join(clusters_path, cluster_name) os.makedirs(cluster_dir) for image in cluster: if ACTION == 'copy': shutil.copy(os.path.abspath(image), cluster_dir) elif ACTION == 'move': shutil.move(os.path.abspath(image), cluster_dir) # Move unclustered images too for i, image in enumerate(unclustered_images): if ACTION == 'copy': shutil.copy(os.path.abspath(image), clusters_path) elif ACTION == 'move': shutil.move(os.path.abspath(image), clusters_path) print('\nAll done!') print('Clustered images can be found in ' + clusters_path + '\n')
#!/usr/bin/python3 # Minimal example. Use the convenience function io.get_image_data() without any # extra arguments. from imagecluster import calc, io as icio, postproc # The bottleneck is calc.fingerprints() called in this function, all other # operations are very fast. get_image_data() writes fingerprints to disk and # loads them again instead of re-calculating them. images, fingerprints, timestamps = icio.get_image_data('downloads/cart icon/') # Run clustering on the fingerprints. Select clusters with similarity index # sim=0.5. clusters = calc.cluster(fingerprints, sim=0.5) # Create dirs with links to images. Dirs represent the clusters the images # belong to. postproc.make_links(clusters, 'downloads/cart icon/imagecluster/clusters') # Plot images arranged in clusters. postproc.visualize(clusters, images)
#!/usr/bin/python3 # Minimal example. Use the convenience function io.get_image_data() without any # extra arguments. from imagecluster import calc, io as icio, postproc # The bottleneck is calc.fingerprints() called in this function, all other # operations are very fast. get_image_data() writes fingerprints to disk and # loads them again instead of re-calculating them. images, fingerprints, timestamps = icio.get_image_data('pics/') # Run clustering on the fingerprints. Select clusters with similarity index # sim=0.5. clusters = calc.cluster(fingerprints, sim=0.5) # Create dirs with links to images. Dirs represent the clusters the images # belong to. postproc.make_links(clusters, 'pics/imagecluster/clusters') # Plot images arranged in clusters. postproc.visualize(clusters, images)