def from_directories(cls, path_to_integration_dir, _prefix='cluster_from_dir', n_images=None, **kwargs): """Constructor to get a cluster from pickle files, from the recursively walked paths. Can take more than one argument for multiple folders. usage: Cluster.from_directories(..) :param path_to_integration_dir: list of directories containing pickle files. Will be searched recursively. :param n_images: find at most this number of images. :param use_b: Boolean. If True, intialise Scale and B. If false, use only mean intensity scalling. """ data = [] def done(): if n_images is None: return False return len(data) >= n_images for arg in path_to_integration_dir: for (dirpath, dirnames, filenames) in os.walk(arg): for filename in filenames: path = os.path.join(dirpath, filename) this_frame = SingleFrame(path, filename, **kwargs) if hasattr(this_frame, 'miller_array'): data.append(this_frame) else: logging.info('skipping file {}'.format(filename)) if done(): break if done(): break if done(): break return cls(data, _prefix, 'Made from files in {}'.format(path_to_integration_dir[:]))
def __init__(self, *args, **kwargs): """ Constructor is same as for SingleFrame object, but has additional kwargs: :param kwargs['scale']: default True. Specifies if the images should be scaled upon creation. Mainly switched off for testing. :param kwargs['use_b']: default True. If false, only initialise the scale factor, not the B factor. """ SingleFrame.__init__(self, *args, **kwargs) if hasattr(self, 'miller_array'): # i.e. if the above worked. self.use_scales = kwargs.get('scale', True) self.use_b = kwargs.get('use_b', True) self.edges = [] # this is populated when the Graph is made. self.partialities = self.calc_partiality(self.get_x0(), update_wilson=self.use_scales) self.scales = self.calc_scales(self.get_x0()) self.label = None # to be used for classification after instantiation self.source = None # for testing, when we know the 'source' of the image self.params = self.get_x0()
def __init__(self, *args, **kwargs): """ Constructor is same as for SingleFrame object, but has additional kwargs: :param kwargs['scale']: default True. Specifies if the images should be scaled upon creation. Mainly switched off for testing. :param kwargs['use_b']: default True. If false, only initialise the scale factor, not the B factor. """ SingleFrame.__init__(self, *args, **kwargs) if hasattr(self, 'miller_array'): # i.e. if the above worked. self.use_scales = kwargs.get('scale', True) self.use_b = kwargs.get('use_b', True) self.edges = [] # this is populated when the Graph is made. self.partialities = self.calc_partiality( self.get_x0(), update_wilson=self.use_scales) self.scales = self.calc_scales(self.get_x0()) self.label = None # to be used for classification after instantiation self.source = None # for testing, when we know the 'source' of the image self.params = self.get_x0()
def all_frames_intensity_stats(self, ax=None, smoothing_width=2000): """ Goes through all frames in the cluster, and plots all the partial intensites. Then does a linear fit and rolling average on these. :param smoothing_width: the width of the smoothing window. :param ax: Optional matplotlib axes object to plot to. Otherwise, plot to screen. :return: the axis, with the data plotted onto it. """ from scipy.stats import linregress from xfel.clustering.singleframe import SingleFrame as Sf import matplotlib.pyplot as plt if ax is None: fig = plt.figure("All images intensity statistics") ax = fig.gca() direct_visualisation = True else: direct_visualisation = False all_logi = [] all_one_over_d_squared = [] for frame in self.members: all_logi.append(frame.log_i) all_one_over_d_squared.append(frame.sinsqtheta_over_lambda_sq) all_logi = np.concatenate(all_logi) all_one_over_d_squared = np.concatenate(all_one_over_d_squared) plotting_data = sorted(zip(all_logi, all_one_over_d_squared), key = lambda x: x[1]) log_i, one_over_d_square = zip(*[i for i in plotting_data if i[0] >=0]) minus_2B, G, r_val, _, std_err = linregress(one_over_d_square, log_i) fit_info = "G: {:.2f}, -2B: {:.2f}, r: {:.2f}, std_err: {:.2f}".format(G, minus_2B, r_val, std_err) smooth = Sf._moving_average(log_i, n=smoothing_width) ax.plot(one_over_d_square, log_i, 'bo', ms=1) ax.plot(one_over_d_square[smoothing_width - 1:], smooth,'--r', lw=2) plt.xlim([0, max(one_over_d_square)]) ax.plot([0, -1 * G / minus_2B], [G, 0], 'y-', lw=2) plt.xlabel(r"$(sin(\theta)/\lambda)^2 [\AA^{-2}]$") plt.ylabel("ln(I)") plt.title("Simple Wilson fit\n{}".format(fit_info)) plt.tight_layout() if direct_visualisation: fig.savefig("{}_dendogram.pdf".format(self.cname)) plt.show() return ax
def all_frames_intensity_stats(self, ax=None, smoothing_width=2000): """ Goes through all frames in the cluster, and plots all the partial intensites. Then does a linear fit and rolling average on these. :param smoothing_width: the width of the smoothing window. :param ax: Optional matplotlib axes object to plot to. Otherwise, plot to screen. :return: the axis, with the data plotted onto it. """ from scipy.stats import linregress from xfel.clustering.singleframe import SingleFrame as Sf if ax is None: fig = plt.figure("All images intensity statistics") ax = fig.gca() direct_visualisation = True else: direct_visualisation = False all_logi = [] all_one_over_d_squared = [] for frame in self.members: all_logi.append(frame.log_i) all_one_over_d_squared.append(frame.sinsqtheta_over_lambda_sq) all_logi = np.concatenate(all_logi) all_one_over_d_squared = np.concatenate(all_one_over_d_squared) plotting_data = sorted(zip(all_logi, all_one_over_d_squared), key = lambda x: x[1]) log_i, one_over_d_square = zip(*[i for i in plotting_data if i[0] >=0]) minus_2B, G, r_val, _, std_err = linregress(one_over_d_square, log_i) fit_info = "G: {:.2f}, -2B: {:.2f}, r: {:.2f}, std_err: {:.2f}".format(G, minus_2B, r_val, std_err) smooth = Sf._moving_average(log_i, n=smoothing_width) ax.plot(one_over_d_square, log_i, 'bo', ms=1) ax.plot(one_over_d_square[smoothing_width - 1:], smooth,'--r', lw=2) plt.xlim([0, max(one_over_d_square)]) ax.plot([0, -1 * G / minus_2B], [G, 0], 'y-', lw=2) plt.xlabel(r"$(sin(\theta)/\lambda)^2 [\AA^{-2}]$") plt.ylabel("ln(I)") plt.title("Simple Wilson fit\n{}".format(fit_info)) plt.tight_layout() if direct_visualisation: fig.savefig("{}_dendogram.pdf".format(self.cname)) plt.show() return ax
def run_one(path): cells = [g for g in generate_unit_cells_from_text(path)] g6 = [SingleFrame.make_g6(u) for u in cells] # for the purpose of this test, cycle through pairs of g6 vectors for ix in xrange(len(g6) - 1): a = g6[ix] b = g6[ix + 1] old = NCDist(a, b) new = NCDist2017(a, b) com = NCDist2017(b, a) assert old == new assert new == com
def from_files(cls, pickle_list, _prefix='cluster_from_file', use_b=True): """Constructor to get a cluster from a list of pickle files. :param pickle_list: list of pickle files :param use_b: Boolean. If True, intialise Scale and B. If false, use only mean intensity scalling. """ data = [] for filename in pickle_list: name_only = filename.split('/')[-1] this_frame = SingleFrame(filename, name_only, use_b=use_b) if hasattr(this_frame, 'name'): data.append(this_frame) else: logging.info('skipping file {}'.format(filename)) return cls(data, _prefix, 'Made by Cluster.from_files')
def run_one(path): cells = [g for g in generate_unit_cells_from_text(path)] g6 = [SingleFrame.make_g6(u) for u in cells] # for the purpose of this test, cycle through pairs of g6 vectors for ix in range(len(g6) - 1): a = g6[ix] b = g6[ix + 1] old = NCDist(a, b) # workaround allows use of non-thread-safe NCDist, even if openMP is enabled elsewhere in the Python program import os, omptbx workaround_nt = int(os.environ.get("OMP_NUM_THREADS", 1)) omptbx.omp_set_num_threads(1) new = NCDist2017(a, b) com = NCDist2017(b, a) omptbx.omp_set_num_threads(workaround_nt) assert old == new, "Zeldin, AB2017" assert new == com, "Pair %d NCDist(a,b) %f != NCDist(b,a) %f" % ( ix, new, com)
def ab_cluster(self, threshold=10000, method='distance', linkage_method='single', log=False, ax=None, write_file_lists=True, schnell=False, doplot=True, labels='default'): """ Hierarchical clustering using the unit cell dimentions. :param threshold: the threshold to use for prunning the tree into clusters. :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy) :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy) :param log: if True, use log scale on y axis. :param ax: if a matplotlib axes object is provided, plot to this. Otherwise, create a new axes object and display on screen. :param write_file_lists: if True, write out the files that make up each cluster. :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Berstein distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells. :param doplot: Boolean flag for if the plotting should be done at all. Runs faster if switched off. :param labels: 'default' will not display any labels for more than 100 images, but will display file names for fewer. This can be manually overidden with a boolean flag. :return: A list of Clusters ordered by largest Cluster to smallest .. note:: Use 'schnell' option with caution, since it can cause strange behaviour around symmetry boundaries. """ logging.info("Hierarchical clustering of unit cells") import scipy.spatial.distance as dist import scipy.cluster.hierarchy as hcluster # 1. Create a numpy array of G6 cells g6_cells = np.array( [SingleFrame.make_g6(image.uc) for image in self.members]) # 2. Do hierarchichal clustering, using the find_distance method above. if schnell: logging.info("Using Euclidean distance") pair_distances = dist.pdist(g6_cells, metric='euclidean') logging.info("Distances have been calculated") this_linkage = hcluster.linkage(pair_distances, method=linkage_method, metric='euclidean') else: logging.info( "Using Andrews-Bernstein distance from Andrews & Bernstein " "J Appl Cryst 47:346 (2014)") pair_distances = dist.pdist(g6_cells, metric=lambda a, b: NCDist(a, b)) logging.info("Distances have been calculated") this_linkage = hcluster.linkage(pair_distances, method=linkage_method, metric=lambda a, b: NCDist(a, b)) cluster_ids = hcluster.fcluster(this_linkage, threshold, criterion=method) logging.debug("Clusters have been calculated") # 3. Create an array of sub-cluster objects from the clustering sub_clusters = [] for cluster in range(max(cluster_ids)): info_string = ('Made using ab_cluster with t={},' ' {} method, and {} linkage').format( threshold, method, linkage_method) sub_clusters.append( self.make_sub_cluster([ self.members[i] for i in range(len(self.members)) if cluster_ids[i] == cluster + 1 ], 'cluster_{}'.format(cluster + 1), info_string)) sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members)) # Rename to order by size for num, cluster in enumerate(sub_clusters): cluster.cname = 'cluster_{}'.format(num + 1) # 3.5 optionally write out the clusters to files. if write_file_lists: for cluster in sub_clusters: if len(cluster.members) > 1: cluster.dump_file_list( out_file_name="{}.lst".format(cluster.cname)) if doplot: if labels is True: labels = [image.name for image in self.members] elif labels is False: labels = ['' for _ in self.members] elif labels == 'default': if len(self.members) > 100: labels = ['' for _ in self.members] else: labels = [image.name for image in self.members] else: labels = [getattr(v, labels, '') for v in self.members] # 4. Plot a dendogram to the axes if no axis is passed, otherwise just # return the axes object if ax is None: fig = plt.figure("Distance Dendogram") ax = fig.gca() direct_visualisation = True else: direct_visualisation = False hcluster.dendrogram(this_linkage, labels=labels, leaf_font_size=8, leaf_rotation=90.0, color_threshold=threshold, ax=ax) if log: ax.set_yscale("symlog", linthreshx=(-1, 1)) else: ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1]) if direct_visualisation: fig.savefig("{}_dendogram.pdf".format(self.cname)) plt.show() return sub_clusters, ax
def ab_cluster( self, threshold=10000, method="distance", linkage_method="single", log=False, ax=None, write_file_lists=True, schnell=False, doplot=True, labels="default", ): """ Hierarchical clustering using the unit cell dimentions. :param threshold: the threshold to use for prunning the tree into clusters. :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy) :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy) :param log: if True, use log scale on y axis. :param ax: if a matplotlib axes object is provided, plot to this. Otherwise, create a new axes object and display on screen. :param write_file_lists: if True, write out the files that make up each cluster. :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Bernstein distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells. :param doplot: Boolean flag for if the plotting should be done at all. Runs faster if switched off. :param labels: 'default' will not display any labels for more than 100 images, but will display file names for fewer. This can be manually overidden with a boolean flag. :return: A list of Clusters ordered by largest Cluster to smallest .. note:: Use 'schnell' option with caution, since it can cause strange behaviour around symmetry boundaries. """ import numpy as np from cctbx.uctbx.determine_unit_cell import NCDist from xfel.clustering.singleframe import SingleFrame logger.info("Hierarchical clustering of unit cells") import scipy.cluster.hierarchy as hcluster import scipy.spatial.distance as dist # 1. Create a numpy array of G6 cells g6_cells = np.array([SingleFrame.make_g6(image.uc) for image in self.members]) # 2. Do hierarchichal clustering, using the find_distance method above. if schnell: logger.info("Using Euclidean distance") metric = "euclidean" else: logger.info( "Using Andrews-Bernstein distance from Andrews & Bernstein " "J Appl Cryst 47:346 (2014)" ) metric = NCDist pair_distances = dist.pdist(g6_cells, metric=metric) if len(pair_distances) > 0: logger.info("Distances have been calculated") this_linkage = hcluster.linkage( pair_distances, method=linkage_method, metric=metric ) cluster_ids = hcluster.fcluster(this_linkage, threshold, criterion=method) logger.debug("Clusters have been calculated") else: logger.debug("No distances were calculated. Aborting clustering.") return [], None # 3. Create an array of sub-cluster objects from the clustering sub_clusters = [] for cluster in range(max(cluster_ids)): info_string = f"Made using ab_cluster with t={threshold}, {method} method, and {linkage_method} linkage" sub_clusters.append( self.make_sub_cluster( [ self.members[i] for i in range(len(self.members)) if cluster_ids[i] == cluster + 1 ], f"cluster_{cluster + 1}", info_string, ) ) sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members)) # Rename to order by size for num, cluster in enumerate(sub_clusters): cluster.cname = f"cluster_{num + 1}" # 3.5 optionally write out the clusters to files. if write_file_lists: for cluster in sub_clusters: if len(cluster.members) > 1: cluster.dump_file_list(out_file_name=f"{cluster.cname}.lst") if labels is True: labels = [image.name for image in self.members] elif labels is False: labels = ["" for _ in self.members] elif labels == "default": if len(self.members) > 100: labels = ["" for _ in self.members] else: labels = [image.name for image in self.members] else: labels = [getattr(v, labels, "") for v in self.members] if doplot: import matplotlib.pyplot as plt # 4. Plot a dendogram to the axes if no axis is passed, otherwise just # return the axes object if ax is None: fig = plt.figure("Distance Dendogram") ax = fig.gca() direct_visualisation = True else: direct_visualisation = False dendrogram = hcluster.dendrogram( this_linkage, labels=labels, p=200, truncate_mode="lastp", # show only the last p merged clusters leaf_font_size=8, leaf_rotation=90.0, color_threshold=threshold, ax=ax, no_plot=not doplot, ) if doplot: if log: ax.set_yscale("symlog", linthreshx=(-1, 1)) else: ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1]) if direct_visualisation: fig.savefig(f"{self.cname}_dendogram.pdf") plt.show() return sub_clusters, dendrogram, ax
def from_files(cls, raw_input=None, pickle_list=[], dials_refls=[], dials_expts=[], _prefix='cluster_from_file', _message='Made from list of individual files', n_images=None, dials=False, json=False, **kwargs): """Constructor to get a cluster from a list of individual files. :param pickle_list: list of pickle files :param dials_refls: list of DIALS integrated reflections :param dials_expts: list of DIALS experiment jsons :param n_images: find at most this number of images :param dials: use the dials_refls and dials_expts arguments to construct the clusters (default: False) :param use_b: Boolean. If True, intialise Scale and B. If False, use only mean intensity scalling. """ data = [] def sort_dials_raw_input(raw): expts = [] refls = [] for path in raw: if path.endswith(".pickle"): refls.append(path) elif path.endswith(".json"): expts.append(path) return (refls, expts) def done(): if n_images is None: return False return len(data) >= n_images if dials: if raw_input is not None: r, e = sort_dials_raw_input(raw_input) dials_refls.extend(r) dials_expts.extend(e) for r, e in zip(dials_refls, dials_expts): this_frame = SingleDialsFrameFromFiles(refls_path=r, expts_path=e, **kwargs) if hasattr(this_frame, 'miller_array'): data.append(this_frame) if done(): break else: logger.info('skipping reflections {} and experiments {}'.format(r, e)) elif json: if raw_input is not None: r, e = sort_dials_raw_input(raw_input) dials_expts.extend(e) dials_expts_ids = [os.path.join(os.path.dirname(e), os.path.basename(e).split("_")[0]) for e in dials_expts] for e in dials_expts: name = os.path.join(os.path.dirname(e), os.path.basename(e).split("_")[0]) this_frame = SingleDialsFrameFromJson(expts_path=e, **kwargs) this_frame.name=name data.append(this_frame) if done(): break else: if raw_input is not None: pickle_list.extend(raw_input) print "There are %d input files"%(len(pickle_list)) from xfel.command_line.print_pickle import generate_data_from_streams for data_dict in generate_data_from_streams(pickle_list): this_frame = SingleFrame(dicti=data_dict, **kwargs) if hasattr(this_frame, 'miller_array'): data.append(this_frame) if done(): break else: logger.info('skipping file {}'.format(os.path.basename(path))) print "%d lattices will be analyzed"%(len(data)) return cls(data, _prefix, _message)
def from_files(cls, raw_input=None, pickle_list=[], dials_refls=[], dials_expts=[], _prefix='cluster_from_file', _message='Made from list of individual files', n_images=None, dials=False, **kwargs): """Constructor to get a cluster from a list of individual files. :param pickle_list: list of pickle files :param dials_refls: list of DIALS integrated reflections :param dials_expts: list of DIALS experiment jsons :param n_images: find at most this number of images :param dials: use the dials_refls and dials_expts arguments to construct the clusters (default: False) :param use_b: Boolean. If True, intialise Scale and B. If False, use only mean intensity scalling. """ data = [] def sort_dials_raw_input(raw): expts = [] refls = [] for path in raw: if path.endswith(".pickle"): refls.append(path) elif path.endswith(".json"): expts.append(path) return (refls, expts) def done(): if n_images is None: return False return len(data) >= n_images if dials: if raw_input is not None: r, e = sort_dials_raw_input(raw_input) dials_refls.extend(r) dials_expts.extend(e) dials_refls_ids = [ os.path.join(os.path.dirname(r), os.path.basename(r).split("_")[0]) for r in dials_refls ] dials_expts_ids = [ os.path.join(os.path.dirname(e), os.path.basename(e).split("_")[0]) for e in dials_expts ] matches = [(dials_refls[i], dials_expts[dials_expts_ids.index(dials_refls_ids[i])]) for i in xrange(len(dials_refls_ids)) if dials_refls_ids[i] in dials_expts_ids] for (r, e) in matches: this_frame = SingleDialsFrameFromFiles(refls_path=r, expts_path=e, **kwargs) if hasattr(this_frame, 'miller_array'): data.append(this_frame) if done(): break else: logging.info( 'skipping reflections {} and experiments {}'.format( r, e)) else: if raw_input is not None: pickle_list.extend(raw_input) for path in pickle_list: this_frame = SingleFrame(path, os.path.basename(path), **kwargs) if hasattr(this_frame, 'miller_array'): data.append(this_frame) if done(): break else: logging.info('skipping file {}'.format( os.path.basename(path))) return cls(data, _prefix, _message)
def ab_cluster(self, threshold=10000, method='distance', linkage_method='single', log=False, ax=None, write_file_lists=True, schnell=False, doplot=True, labels='default'): """ Hierarchical clustering using the unit cell dimentions. :param threshold: the threshold to use for prunning the tree into clusters. :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy) :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy) :param log: if True, use log scale on y axis. :param ax: if a matplotlib axes object is provided, plot to this. Otherwise, create a new axes object and display on screen. :param write_file_lists: if True, write out the files that make up each cluster. :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Berstein distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells. :param doplot: Boolean flag for if the plotting should be done at all. Runs faster if switched off. :param labels: 'default' will not display any labels for more than 100 images, but will display file names for fewer. This can be manually overidden with a boolean flag. :return: A list of Clusters ordered by largest Cluster to smallest .. note:: Use 'schnell' option with caution, since it can cause strange behaviour around symmetry boundaries. """ logging.info("Hierarchical clustering of unit cells") import scipy.spatial.distance as dist import scipy.cluster.hierarchy as hcluster # 1. Create a numpy array of G6 cells g6_cells = np.array([SingleFrame.make_g6(image.uc) for image in self.members]) # 2. Do hierarchichal clustering, using the find_distance method above. if schnell: logging.info("Using Euclidean distance") pair_distances = dist.pdist(g6_cells, metric='euclidean') logging.info("Distances have been calculated") this_linkage = hcluster.linkage(pair_distances, method=linkage_method, metric='euclidean') else: logging.info("Using Andrews-Bernstein distance from Andrews & Bernstein " "J Appl Cryst 47:346 (2014)") pair_distances = dist.pdist(g6_cells, metric=lambda a, b: NCDist(a, b)) logging.info("Distances have been calculated") this_linkage = hcluster.linkage(pair_distances, method=linkage_method, metric=lambda a, b: NCDist(a, b)) cluster_ids = hcluster.fcluster(this_linkage, threshold, criterion=method) logging.debug("Clusters have been calculated") # 3. Create an array of sub-cluster objects from the clustering sub_clusters = [] for cluster in range(max(cluster_ids)): info_string = ('Made using ab_cluster with t={},' ' {} method, and {} linkage').format(threshold, method, linkage_method) sub_clusters.append(self.make_sub_cluster([self.members[i] for i in range(len(self.members)) if cluster_ids[i] == cluster + 1], 'cluster_{}'.format( cluster + 1), info_string)) sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members)) # Rename to order by size for num, cluster in enumerate(sub_clusters): cluster.cname = 'cluster_{}'.format(num + 1) # 3.5 optionally write out the clusters to files. if write_file_lists: for cluster in sub_clusters: if len(cluster.members) > 1: cluster.dump_file_list(out_file_name="{}.lst".format(cluster.cname)) if doplot: if labels is True: labels = [image.name for image in self.members] elif labels is False: labels = ['' for _ in self.members] elif labels == 'default': if len(self.members) > 100: labels = ['' for _ in self.members] else: labels = [image.name for image in self.members] else: labels = [getattr(v, labels, '') for v in self.members] # 4. Plot a dendogram to the axes if no axis is passed, otherwise just # return the axes object if ax is None: fig = plt.figure("Distance Dendogram") ax = fig.gca() direct_visualisation = True else: direct_visualisation = False hcluster.dendrogram(this_linkage, labels=labels, leaf_font_size=8, leaf_rotation=90.0, color_threshold=threshold, ax=ax) if log: ax.set_yscale("symlog", linthreshx=(-1,1)) else: ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1]) if direct_visualisation: fig.savefig("{}_dendogram.pdf".format(self.cname)) plt.show() return sub_clusters, ax