def plot_mosaics(config, clustering_type, clustering, output_image_width, output_image_height, mosaic_nrow, mosaic_ncol_max): """ Mosaic of each cluster """ clusters_id = np.unique(clustering.final_labels) clustering_res = clustering.get_results() for cluster_id in clusters_id: cluster_image_filenames = [ os.path.join( config.get('directory')['collections'], "%s.jpg" % one_res[0]) for one_res in clustering_res if one_res[1] == cluster_id ] images_array = [ Tools.read_np_picture(img_filename, target_size=(output_image_height, output_image_width)) for img_filename in cluster_image_filenames ] img = Tools.display_mosaic(images_array, nrow=mosaic_nrow, ncol_max=mosaic_ncol_max) img.save( os.path.join(clustering.save_directory, "cluster_%s.png" % str(cluster_id).zfill(2)), "PNG") return clusters_id
def main(): _CLUSTERING_TYPE = 'n2d' _BATCH_SIZE = 496 _N_BATCH = 5 _PLOTS = True _MOSAICS = True _SILHOUETTE = True _OUTPUT_IMAGE_WIDTH = 96 _OUTPUT_IMAGE_HEIGHT = 54 _MOSAIC_NROW = 10 _MOSAIC_NCOL_MAX = 10 model, model_config = Tools.load_model( CONFIG, CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['type'], CONFIG.get('clustering')[_CLUSTERING_TYPE]['model']['name']) filenames = Tools.list_directory_filenames( CONFIG.get('sampling')['autoencoder']['directory']['train']) pictures_id, intermediate_output = Tools.load_latent_representation( CONFIG, model, model_config, filenames, _BATCH_SIZE, _N_BATCH, False) clustering = run_clustering(CONFIG, _CLUSTERING_TYPE, pictures_id, intermediate_output) clustering.save() if _PLOTS: run_plots(CONFIG, _CLUSTERING_TYPE, clustering) if _SILHOUETTE: plot_silhouette(CONFIG, _CLUSTERING_TYPE, clustering) if _MOSAICS: plot_mosaics(CONFIG, _CLUSTERING_TYPE, clustering, _OUTPUT_IMAGE_WIDTH, _OUTPUT_IMAGE_HEIGHT, _MOSAIC_NROW, _MOSAIC_NCOL_MAX)
def create_sprite(config, df): images_array = [ Tools.read_np_picture(os.path.join( config.get('directory')['collections'], "%s.jpg" % picture_id), target_size=(TARGET_SIZE_HEIGHT, TARGET_SIZE_WIDTH)) for picture_id in df['pictures_id'] ] sprite = np.zeros((TARGET_SIZE_HEIGHT * SPRITE_NB_LIGNE, TARGET_SIZE_WIDTH * SPRITE_NB_COLONNE, 3)) index = 0 for i in range(SPRITE_NB_LIGNE): for j in range(SPRITE_NB_COLONNE): sprite[(i * TARGET_SIZE_HEIGHT):(i + 1) * TARGET_SIZE_HEIGHT, (j * TARGET_SIZE_WIDTH):(j + 1) * TARGET_SIZE_WIDTH, :] = images_array[index] index += 1 if index >= len(images_array): break if index >= len(images_array): break img = Tools.display_one_picture(sprite) return img
def save(self): Tools.create_dir_if_not_exists(self.save_directory) joblib.dump(self.umap_fit, os.path.join(self.save_directory, self.umap_save_name)) joblib.dump(self.dbscan_fit, os.path.join(self.save_directory, self.dbscan_save_name))
def save(self): Tools.create_dir_if_not_exists(self.save_directory) joblib.dump(self.pca_fit, os.path.join(self.save_directory, self.pca_save_name)) joblib.dump(self.kmeans_fit, os.path.join(self.save_directory, self.kmeans_save_name)) joblib.dump(self.cah_fit, os.path.join(self.save_directory, self.cah_save_name))
def save(self): Tools.create_dir_if_not_exists(self.config['save_directory']) joblib.dump( self.pca_fit, os.path.join(self.config['save_directory'], self.pca_save_name)) joblib.dump( self.kmeans_fit, os.path.join(self.config['save_directory'], self.kmeans_save_name)) joblib.dump( self.dbscan_fit, os.path.join(self.config['save_directory'], self.dbscan_save_name))
def __init__(self, config, pictures_id, pictures_np): self.config = config self.save_directory = os.path.join( self.config['save_directory'], '%s_%s_%s' % (self.config['model']['type'], self.config['model']['name'], self.config['version'])) self.pictures_id = pictures_id self.pictures_np = pictures_np self.final_labels = None self.colors = None Tools.create_dir_if_not_exists(self.save_directory)
def compute_colors(self): n_classes = len(list(set(self.final_labels))) self.colors = [ Tools.get_color_from_label(label, n_classes) for label in self.final_labels ] return self
def main(): ## db manager db_manager = Tools.create_db_manager(CONFIG) ## request data df = request_data(CONFIG, db_manager) ## create sprite sprite = create_sprite(CONFIG, df) ## save sprite sprite.save( os.path.join( CONFIG.get('directory')['reports'], 'figures', 'sprite_altas.png'), "PNG") ## generate facets html_facets = generate_facets(CONFIG, df) with open(os.path.join(CONFIG.get('directory')['reports'], 'facets.html'), 'w') as f: f.write(html_facets) ## generate facets-dive html_facets_dive = generate_facets_dive(CONFIG, df, './figures/sprite_altas.png') with open( os.path.join( CONFIG.get('directory')['reports'], 'facets-dive.html'), 'w') as f: f.write(html_facets_dive)
def create_poster_picture(config, poster_config, poster, pictures_df): pictures_id_positions = [] for part in poster_config['DECOUPAGE']: picture_id = None directory = config.get('directory')['collections'] if 'directory' in part.keys() and part['directory']: directory = os.path.join( config.get('directory')['project_dir'], part['directory']) if part['cluster']: selection = pictures_df.loc[pictures_df.label.isin( part['cluster'])].sample(1) pictures_df = pictures_df.drop(selection.index) picture_id = selection.pictures_id.values[0] else: picture_id = random.choice(part['picture_ids']) pictures_id_positions.append({ 'pictures_id': picture_id, 'position': part['position'] }) if part['size'][0] < O_WIDTH or part['size'][1] < O_HEIGHT: part_array = Tools.read_np_picture(os.path.join( directory, "%s.jpg" % picture_id), target_size=(O_HEIGHT, O_WIDTH)) index_col = np.floor((O_WIDTH - part['size'][0]) / 2).astype(int) index_row = np.floor((O_HEIGHT - part['size'][1]) / 2).astype(int) part_array = part_array[index_col:(index_col + part['size'][1]), index_row:(index_row + part['size'][0]), :].copy() else: part_array = Tools.read_np_picture( os.path.join(directory, "%s.jpg" % picture_id), target_size=(part['size'][1], part['size'][0])) poster[part['origin'][1]:(part['origin'][1] + part['size'][1]), part['origin'][0]:(part['origin'][0] + part['size'][0]), :] = part_array return poster, pictures_id_positions
def populate_embedding(config, db_manager, clustering_type, clustering_version, clustering_model_type, clustering_model_name, drop=False): clustering, clustering_config = Tools.load_clustering(CONFIG, clustering_type, clustering_version, clustering_model_type, clustering_model_name) if drop: db_manager.drop_embedding_partition(clustering_type, clustering_version, clustering_model_type, clustering_model_name) if clustering_type == 'n2d': clustering = N2DClustering(clustering_config) elif clustering_type == 'classical': clustering = ClassicalClustering(clustering_config) elif clustering_type == 'dbscan': clustering = DBScanClustering(clustering_config) else: raise Exception clustering.load() model, model_config = Tools.load_model(CONFIG, clustering_model_type, clustering_model_name) filenames = Tools.list_directory_filenames(CONFIG.get('directory')['collections']) generator = Tools.load_latent_representation(CONFIG, model, model_config, filenames, 496, None, True) count = 0 for ids, latents in generator: pictures_embedding = clustering.predict_embedding(latents) pictures_label = clustering.predict_label(pictures_embedding) rows = [] for i, id in enumerate(ids): rows.append(( id, float(np.round(pictures_embedding[i][0], 4)), float(np.round(pictures_embedding[i][1], 4)), int(pictures_label[i]), clustering_type, clustering_version, clustering_model_type, clustering_model_name )) count += db_manager.insert_row_pictures_embedding(rows) print("Nombre d'insertion: %s / %s" % (count, len(filenames))) return
def write_poster_picture(config, poster_config, poster, poster_id=None): poster_id = poster_id if poster_id else uuid.uuid1() img = Tools.display_one_picture(poster) img.save( os.path.join( CONFIG.get('directory')['data_dir'], 'posters', "version_%s_%s.jpg" % (poster_config['version'], poster_id)), "JPEG") return poster_id
def main(action = 'populate_embedding'): db_manager = Tools.create_db_manager(CONFIG) if action == 'population_locations': populate_locations(CONFIG, db_manager) elif action == 'populate_embedding': db_manager.create_pictures_embedding_table(False) to_load = [ {'clustering_type': 'n2d', 'clustering_version': 1, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False}, {'clustering_type': 'n2d', 'clustering_version': 2, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False}, {'clustering_type': 'n2d', 'clustering_version': 3, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False}, ] for kwargs in to_load: try: populate_embedding(CONFIG, db_manager, **kwargs) except Exception as err: print(err) pass else: pass
def main(config_id, generate, poster_id): N_GENERATE = generate POSTER_ID = poster_id db_manager = Tools.create_db_manager(CONFIG) ## creation de la base db_manager.create_posters_table() ## poster_config = POSTERS_CONFIG[config_id] ## pictures_df = get_pictures_df(CONFIG, db_manager, poster_config) ## poster = create_empty_poster(CONFIG, poster_config) ## poster = create_poster_template_picture(CONFIG, poster_config, poster) ## write_poster_picture(CONFIG, poster_config, poster, poster_id="template") for i in range(N_GENERATE): print("%s / %s" % (i + 1, N_GENERATE)) ## poster, p_pos = create_poster_picture(CONFIG, poster_config, poster, pictures_df) ## poster_id = save_poster_picture(CONFIG, poster_config, p_pos, db_manager, POSTER_ID) ## write_poster_picture(CONFIG, poster_config, poster, poster_id)
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE")) #%% [markdown] # ## Chargement du modèle #%% ## charger le modèle model_type = 'simple_conv' cfg.get('models')[model_type]['model_name'] = 'model_colab' model = SimpleConvAutoEncoder(cfg.get('models')[model_type]) #%% [markdown] ## Chargement des images #%% filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/') generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10) #%% pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model) #%% intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16)) #%% [markdown] # ## ACP # Réduction de la dimension #%% clustering = ClassicalClustering(cfg.get('clustering')['classical'], pictures_id, intermediate_output)
def run_plots(config, clustering_type, clustering): """ Plots specifics graphs """ if clustering_type in ['classical']: ## Graphs of PCA and final clusters fig, ax = plt.subplots(figsize=(24, 14)) scatter = ax.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c=clustering.colors) legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") ax.add_artist(legend1) plt.savefig(os.path.join(clustering.save_directory, 'pca_clusters.png')) if clustering_type in ['classical']: ## Graphs of TSNE and final clusters fig, ax = plt.subplots(figsize=(24, 14)) classes = clustering.final_labels scatter = ax.scatter(clustering.tsne_embedding[:, 0], clustering.tsne_embedding[:, 1], c=clustering.colors) legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") ax.add_artist(legend1) plt.savefig( os.path.join(clustering.save_directory, 'tsne_clusters.png')) if clustering_type in ['n2d', 'dbscan']: ## Graphs of TSNE and final clusters fig, ax = plt.subplots(figsize=(24, 14)) classes = clustering.final_labels scatter = ax.scatter(clustering.umap_embedding[:, 0], clustering.umap_embedding[:, 1], c=clustering.colors) legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") ax.add_artist(legend1) plt.savefig( os.path.join(clustering.save_directory, 'umap_clusters.png')) if clustering_type in ['n2d', 'classical', 'dbscan']: filenames = [ os.path.join( config.get('directory')['collections'], "%s.jpg" % one_res[0]) for one_res in clustering.get_results() ] images_array = [ Tools.read_np_picture(img_filename, target_size=(54, 96)) for img_filename in filenames ] base64_images = [Tools.base64_image(img) for img in images_array] if clustering_type in ['n2d', 'dbscan']: x = clustering.umap_embedding[:, 0] y = clustering.umap_embedding[:, 1] html_file = 'umap_bokeh.html' title = 'UMAP projection of iss clusters' elif clustering_type == 'classical': x = clustering.tsne_embedding[:, 0] y = clustering.tsne_embedding[:, 1] html_file = 'tsne_bokeh.html' title = 't-SNE projection of iss clusters' df = pd.DataFrame({'x': x, 'y': y}) df['image'] = base64_images df['label'] = clustering.final_labels.astype(str) df['color'] = df['label'].apply(Tools.get_color_from_label) datasource = ColumnDataSource(df) output_file(os.path.join(clustering.save_directory, html_file)) plot_figure = figure( title=title, # plot_width=1200, # plot_height=1200, tools=('pan, wheel_zoom, reset')) plot_figure.add_tools( HoverTool(tooltips=""" <div> <div> <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/> </div> <div> <span style='font-size: 16px'>Cluster:</span> <span style='font-size: 18px'>@label</span> </div> </div> """)) plot_figure.circle('x', 'y', source=datasource, color=dict(field='color'), line_alpha=0.6, fill_alpha=0.6, size=4) show(plot_figure) if clustering_type in ['classical']: ## Dendogram fig, ax = plt.subplots(figsize=(24, 14)) plt.title('Hierarchical Clustering Dendrogram') Tools.plot_dendrogram(clustering.cah_fit, labels=clustering.cah_labels) plt.savefig(os.path.join(clustering.save_directory, 'dendograms.png')) return True
# -*- coding: utf-8 -*- from iss.tools import Config from iss.tools import Tools from iss.models import SimpleConvAutoEncoder from iss.clustering import ClassicalClustering from dotenv import find_dotenv, load_dotenv ## Config load_dotenv(find_dotenv()) cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE")) ## charger le modèle model_type = 'simple_conv' cfg.get('models')[model_type]['model_name'] = 'model_colab' model = SimpleConvAutoEncoder(cfg.get('models')[model_type]) ## Générateur d'image filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/') generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 2) ## Générer des images