예제 #1
0
    def from_directories(cls,
                         path_to_integration_dir,
                         _prefix='cluster_from_dir',
                         n_images=None,
                         **kwargs):
        """Constructor to get a cluster from pickle files, from the recursively
    walked paths. Can take more than one argument for multiple folders.
    usage: Cluster.from_directories(..)
    :param path_to_integration_dir: list of directories containing pickle files.
    Will be searched recursively.
    :param n_images: find at most this number of images.
    :param use_b: Boolean. If True, intialise Scale and B. If false, use only
    mean intensity scalling.
    """
        data = []

        def done():
            if n_images is None:
                return False
            return len(data) >= n_images

        for arg in path_to_integration_dir:
            for (dirpath, dirnames, filenames) in os.walk(arg):
                for filename in filenames:
                    path = os.path.join(dirpath, filename)
                    this_frame = SingleFrame(path, filename, **kwargs)
                    if hasattr(this_frame, 'miller_array'):
                        data.append(this_frame)
                    else:
                        logging.info('skipping file {}'.format(filename))
                    if done(): break
                if done(): break
            if done(): break
        return cls(data, _prefix,
                   'Made from files in {}'.format(path_to_integration_dir[:]))
예제 #2
0
 def __init__(self, *args, **kwargs):
   """ Constructor is same as for SingleFrame object, but has additional
   kwargs:
   :param kwargs['scale']: default True. Specifies if the images should be scaled upon creation. Mainly switched off for testing.
   :param kwargs['use_b']: default True. If false, only initialise the scale factor, not the B factor.
   """
   SingleFrame.__init__(self, *args, **kwargs)
   if hasattr(self, 'miller_array'):  # i.e. if the above worked.
     self.use_scales = kwargs.get('scale', True)
     self.use_b = kwargs.get('use_b', True)
     self.edges = []  # this is populated when the Graph is made.
     self.partialities = self.calc_partiality(self.get_x0(),
                                              update_wilson=self.use_scales)
     self.scales = self.calc_scales(self.get_x0())
     self.label = None  # to be used for classification after instantiation
     self.source = None  # for testing, when we know the 'source' of the image
     self.params = self.get_x0()
예제 #3
0
파일: components.py 프로젝트: dials/cctbx
 def __init__(self, *args, **kwargs):
     """ Constructor is same as for SingleFrame object, but has additional
 kwargs:
 :param kwargs['scale']: default True. Specifies if the images should be scaled upon creation. Mainly switched off for testing.
 :param kwargs['use_b']: default True. If false, only initialise the scale factor, not the B factor.
 """
     SingleFrame.__init__(self, *args, **kwargs)
     if hasattr(self, 'miller_array'):  # i.e. if the above worked.
         self.use_scales = kwargs.get('scale', True)
         self.use_b = kwargs.get('use_b', True)
         self.edges = []  # this is populated when the Graph is made.
         self.partialities = self.calc_partiality(
             self.get_x0(), update_wilson=self.use_scales)
         self.scales = self.calc_scales(self.get_x0())
         self.label = None  # to be used for classification after instantiation
         self.source = None  # for testing, when we know the 'source' of the image
         self.params = self.get_x0()
예제 #4
0
  def all_frames_intensity_stats(self, ax=None, smoothing_width=2000):
    """
    Goes through all frames in the cluster, and plots all the partial intensites.
    Then does a linear fit and rolling average on these.

    :param smoothing_width: the width of the smoothing window.
    :param ax: Optional matplotlib axes object to plot to. Otherwise, plot to screen.
    :return: the axis, with the data plotted onto it.
    """
    from scipy.stats import linregress
    from xfel.clustering.singleframe import SingleFrame as Sf
    import matplotlib.pyplot as plt

    if ax is None:
      fig = plt.figure("All images intensity statistics")
      ax = fig.gca()
      direct_visualisation = True
    else:
      direct_visualisation = False


    all_logi = []
    all_one_over_d_squared = []

    for frame in self.members:
      all_logi.append(frame.log_i)
      all_one_over_d_squared.append(frame.sinsqtheta_over_lambda_sq)

    all_logi = np.concatenate(all_logi)
    all_one_over_d_squared = np.concatenate(all_one_over_d_squared)

    plotting_data = sorted(zip(all_logi, all_one_over_d_squared),
                           key = lambda x: x[1])

    log_i, one_over_d_square = zip(*[i for i in plotting_data
                                     if i[0] >=0])
    minus_2B, G, r_val, _, std_err = linregress(one_over_d_square, log_i)
    fit_info = "G: {:.2f}, -2B: {:.2f}, r: {:.2f}, std_err: {:.2f}".format(G, minus_2B,
                                                            r_val, std_err)
    smooth = Sf._moving_average(log_i, n=smoothing_width)
    ax.plot(one_over_d_square, log_i, 'bo', ms=1)
    ax.plot(one_over_d_square[smoothing_width - 1:], smooth,'--r', lw=2)
    plt.xlim([0, max(one_over_d_square)])
    ax.plot([0, -1 * G / minus_2B], [G, 0], 'y-', lw=2)
    plt.xlabel(r"$(sin(\theta)/\lambda)^2 [\AA^{-2}]$")
    plt.ylabel("ln(I)")
    plt.title("Simple Wilson fit\n{}".format(fit_info))
    plt.tight_layout()

    if direct_visualisation:
      fig.savefig("{}_dendogram.pdf".format(self.cname))
      plt.show()

    return ax
예제 #5
0
  def all_frames_intensity_stats(self, ax=None, smoothing_width=2000):
    """
    Goes through all frames in the cluster, and plots all the partial intensites.
    Then does a linear fit and rolling average on these.

    :param smoothing_width: the width of the smoothing window.
    :param ax: Optional matplotlib axes object to plot to. Otherwise, plot to screen.
    :return: the axis, with the data plotted onto it.
    """
    from scipy.stats import linregress
    from xfel.clustering.singleframe import SingleFrame as Sf

    if ax is None:
      fig = plt.figure("All images intensity statistics")
      ax = fig.gca()
      direct_visualisation = True
    else:
      direct_visualisation = False


    all_logi = []
    all_one_over_d_squared = []

    for frame in self.members:
      all_logi.append(frame.log_i)
      all_one_over_d_squared.append(frame.sinsqtheta_over_lambda_sq)

    all_logi = np.concatenate(all_logi)
    all_one_over_d_squared = np.concatenate(all_one_over_d_squared)

    plotting_data = sorted(zip(all_logi, all_one_over_d_squared),
                           key = lambda x: x[1])

    log_i, one_over_d_square = zip(*[i for i in plotting_data
                                     if i[0] >=0])
    minus_2B, G, r_val, _, std_err = linregress(one_over_d_square, log_i)
    fit_info = "G: {:.2f}, -2B: {:.2f}, r: {:.2f}, std_err: {:.2f}".format(G, minus_2B,
                                                            r_val, std_err)
    smooth = Sf._moving_average(log_i, n=smoothing_width)
    ax.plot(one_over_d_square, log_i, 'bo', ms=1)
    ax.plot(one_over_d_square[smoothing_width - 1:], smooth,'--r', lw=2)
    plt.xlim([0, max(one_over_d_square)])
    ax.plot([0, -1 * G / minus_2B], [G, 0], 'y-', lw=2)
    plt.xlabel(r"$(sin(\theta)/\lambda)^2 [\AA^{-2}]$")
    plt.ylabel("ln(I)")
    plt.title("Simple Wilson fit\n{}".format(fit_info))
    plt.tight_layout()

    if direct_visualisation:
      fig.savefig("{}_dendogram.pdf".format(self.cname))
      plt.show()

    return ax
예제 #6
0
def run_one(path):
    cells = [g for g in generate_unit_cells_from_text(path)]
    g6 = [SingleFrame.make_g6(u) for u in cells]

    # for the purpose of this test, cycle through pairs of g6 vectors
    for ix in xrange(len(g6) - 1):
        a = g6[ix]
        b = g6[ix + 1]
        old = NCDist(a, b)
        new = NCDist2017(a, b)
        com = NCDist2017(b, a)
        assert old == new
        assert new == com
예제 #7
0
 def from_files(cls, pickle_list, _prefix='cluster_from_file', use_b=True):
     """Constructor to get a cluster from a list of pickle files.
 :param pickle_list: list of pickle files
 :param use_b: Boolean. If True, intialise Scale and B. If false, use only
 mean intensity scalling.
 """
     data = []
     for filename in pickle_list:
         name_only = filename.split('/')[-1]
         this_frame = SingleFrame(filename, name_only, use_b=use_b)
         if hasattr(this_frame, 'name'):
             data.append(this_frame)
         else:
             logging.info('skipping file {}'.format(filename))
     return cls(data, _prefix, 'Made by Cluster.from_files')
def run_one(path):
    cells = [g for g in generate_unit_cells_from_text(path)]
    g6 = [SingleFrame.make_g6(u) for u in cells]

    # for the purpose of this test, cycle through pairs of g6 vectors
    for ix in range(len(g6) - 1):
        a = g6[ix]
        b = g6[ix + 1]
        old = NCDist(a, b)
        # workaround allows use of non-thread-safe NCDist, even if openMP is enabled elsewhere in the Python program
        import os, omptbx
        workaround_nt = int(os.environ.get("OMP_NUM_THREADS", 1))
        omptbx.omp_set_num_threads(1)
        new = NCDist2017(a, b)
        com = NCDist2017(b, a)
        omptbx.omp_set_num_threads(workaround_nt)
        assert old == new, "Zeldin, AB2017"
        assert new == com, "Pair %d NCDist(a,b) %f != NCDist(b,a) %f" % (
            ix, new, com)
예제 #9
0
    def ab_cluster(self,
                   threshold=10000,
                   method='distance',
                   linkage_method='single',
                   log=False,
                   ax=None,
                   write_file_lists=True,
                   schnell=False,
                   doplot=True,
                   labels='default'):
        """
    Hierarchical clustering using the unit cell dimentions.

    :param threshold: the threshold to use for prunning the tree into clusters.
    :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy)
    :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy)
    :param log: if True, use log scale on y axis.
    :param ax: if a matplotlib axes object is provided, plot to this. Otherwise, create a new axes object and display on screen.
    :param write_file_lists: if True, write out the files that make up each cluster.
    :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Berstein distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells.
    :param doplot: Boolean flag for if the plotting should be done at all.
    Runs faster if switched off.
    :param labels: 'default' will not display any labels for more than 100 images, but will display file names for fewer. This can be manually overidden with a boolean flag.
    :return: A list of Clusters ordered by largest Cluster to smallest

    .. note::
      Use 'schnell' option with caution, since it can cause strange behaviour
      around symmetry boundaries.
    """

        logging.info("Hierarchical clustering of unit cells")
        import scipy.spatial.distance as dist
        import scipy.cluster.hierarchy as hcluster

        # 1. Create a numpy array of G6 cells
        g6_cells = np.array(
            [SingleFrame.make_g6(image.uc) for image in self.members])

        # 2. Do hierarchichal clustering, using the find_distance method above.
        if schnell:
            logging.info("Using Euclidean distance")
            pair_distances = dist.pdist(g6_cells, metric='euclidean')
            logging.info("Distances have been calculated")
            this_linkage = hcluster.linkage(pair_distances,
                                            method=linkage_method,
                                            metric='euclidean')
        else:
            logging.info(
                "Using Andrews-Bernstein distance from Andrews & Bernstein "
                "J Appl Cryst 47:346 (2014)")
            pair_distances = dist.pdist(g6_cells,
                                        metric=lambda a, b: NCDist(a, b))
            logging.info("Distances have been calculated")
            this_linkage = hcluster.linkage(pair_distances,
                                            method=linkage_method,
                                            metric=lambda a, b: NCDist(a, b))
        cluster_ids = hcluster.fcluster(this_linkage,
                                        threshold,
                                        criterion=method)
        logging.debug("Clusters have been calculated")

        # 3. Create an array of sub-cluster objects from the clustering
        sub_clusters = []
        for cluster in range(max(cluster_ids)):
            info_string = ('Made using ab_cluster with t={},'
                           ' {} method, and {} linkage').format(
                               threshold, method, linkage_method)
            sub_clusters.append(
                self.make_sub_cluster([
                    self.members[i] for i in range(len(self.members))
                    if cluster_ids[i] == cluster + 1
                ], 'cluster_{}'.format(cluster + 1), info_string))

        sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members))
        # Rename to order by size
        for num, cluster in enumerate(sub_clusters):
            cluster.cname = 'cluster_{}'.format(num + 1)

        # 3.5 optionally write out the clusters to files.
        if write_file_lists:
            for cluster in sub_clusters:
                if len(cluster.members) > 1:
                    cluster.dump_file_list(
                        out_file_name="{}.lst".format(cluster.cname))

        if doplot:
            if labels is True:
                labels = [image.name for image in self.members]
            elif labels is False:
                labels = ['' for _ in self.members]
            elif labels == 'default':
                if len(self.members) > 100:
                    labels = ['' for _ in self.members]
                else:
                    labels = [image.name for image in self.members]
            else:
                labels = [getattr(v, labels, '') for v in self.members]

            # 4. Plot a dendogram to the axes if no axis is passed, otherwise just
            #    return the axes object
            if ax is None:
                fig = plt.figure("Distance Dendogram")
                ax = fig.gca()
                direct_visualisation = True
            else:
                direct_visualisation = False

            hcluster.dendrogram(this_linkage,
                                labels=labels,
                                leaf_font_size=8,
                                leaf_rotation=90.0,
                                color_threshold=threshold,
                                ax=ax)

            if log:
                ax.set_yscale("symlog", linthreshx=(-1, 1))
            else:
                ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1])

            if direct_visualisation:
                fig.savefig("{}_dendogram.pdf".format(self.cname))
                plt.show()

        return sub_clusters, ax
예제 #10
0
파일: unit_cell.py 프로젝트: phyy-nx/dials
    def ab_cluster(
        self,
        threshold=10000,
        method="distance",
        linkage_method="single",
        log=False,
        ax=None,
        write_file_lists=True,
        schnell=False,
        doplot=True,
        labels="default",
    ):
        """
        Hierarchical clustering using the unit cell dimentions.

        :param threshold: the threshold to use for prunning the tree into clusters.
        :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy)
        :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy)
        :param log: if True, use log scale on y axis.
        :param ax: if a matplotlib axes object is provided, plot to this.
                   Otherwise, create a new axes object and display on screen.
        :param write_file_lists: if True, write out the files that make up each cluster.
        :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Bernstein
                        distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells.
        :param doplot: Boolean flag for if the plotting should be done at all.
                       Runs faster if switched off.
        :param labels: 'default' will not display any labels for more than 100 images, but will display
                       file names for fewer. This can be manually overidden with a boolean flag.
        :return: A list of Clusters ordered by largest Cluster to smallest

        .. note::
          Use 'schnell' option with caution, since it can cause strange behaviour
          around symmetry boundaries.
        """

        import numpy as np

        from cctbx.uctbx.determine_unit_cell import NCDist
        from xfel.clustering.singleframe import SingleFrame

        logger.info("Hierarchical clustering of unit cells")
        import scipy.cluster.hierarchy as hcluster
        import scipy.spatial.distance as dist

        # 1. Create a numpy array of G6 cells
        g6_cells = np.array([SingleFrame.make_g6(image.uc) for image in self.members])

        # 2. Do hierarchichal clustering, using the find_distance method above.
        if schnell:
            logger.info("Using Euclidean distance")
            metric = "euclidean"
        else:
            logger.info(
                "Using Andrews-Bernstein distance from Andrews & Bernstein "
                "J Appl Cryst 47:346 (2014)"
            )
            metric = NCDist
        pair_distances = dist.pdist(g6_cells, metric=metric)
        if len(pair_distances) > 0:
            logger.info("Distances have been calculated")
            this_linkage = hcluster.linkage(
                pair_distances, method=linkage_method, metric=metric
            )
            cluster_ids = hcluster.fcluster(this_linkage, threshold, criterion=method)
            logger.debug("Clusters have been calculated")
        else:
            logger.debug("No distances were calculated. Aborting clustering.")
            return [], None

        # 3. Create an array of sub-cluster objects from the clustering
        sub_clusters = []
        for cluster in range(max(cluster_ids)):
            info_string = f"Made using ab_cluster with t={threshold}, {method} method, and {linkage_method} linkage"
            sub_clusters.append(
                self.make_sub_cluster(
                    [
                        self.members[i]
                        for i in range(len(self.members))
                        if cluster_ids[i] == cluster + 1
                    ],
                    f"cluster_{cluster + 1}",
                    info_string,
                )
            )

        sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members))
        # Rename to order by size
        for num, cluster in enumerate(sub_clusters):
            cluster.cname = f"cluster_{num + 1}"

        # 3.5 optionally write out the clusters to files.
        if write_file_lists:
            for cluster in sub_clusters:
                if len(cluster.members) > 1:
                    cluster.dump_file_list(out_file_name=f"{cluster.cname}.lst")

        if labels is True:
            labels = [image.name for image in self.members]
        elif labels is False:
            labels = ["" for _ in self.members]
        elif labels == "default":
            if len(self.members) > 100:
                labels = ["" for _ in self.members]
            else:
                labels = [image.name for image in self.members]
        else:
            labels = [getattr(v, labels, "") for v in self.members]

        if doplot:
            import matplotlib.pyplot as plt

            # 4. Plot a dendogram to the axes if no axis is passed, otherwise just
            #    return the axes object
            if ax is None:
                fig = plt.figure("Distance Dendogram")
                ax = fig.gca()
                direct_visualisation = True
            else:
                direct_visualisation = False

        dendrogram = hcluster.dendrogram(
            this_linkage,
            labels=labels,
            p=200,
            truncate_mode="lastp",  # show only the last p merged clusters
            leaf_font_size=8,
            leaf_rotation=90.0,
            color_threshold=threshold,
            ax=ax,
            no_plot=not doplot,
        )

        if doplot:
            if log:
                ax.set_yscale("symlog", linthreshx=(-1, 1))
            else:
                ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1])

            if direct_visualisation:
                fig.savefig(f"{self.cname}_dendogram.pdf")
                plt.show()

        return sub_clusters, dendrogram, ax
예제 #11
0
  def from_files(cls,
                 raw_input=None,
                 pickle_list=[],
                 dials_refls=[],
                 dials_expts=[],
                 _prefix='cluster_from_file',
                 _message='Made from list of individual files',
                 n_images=None,
                 dials=False,
                 json=False,
                 **kwargs):
    """Constructor to get a cluster from a list of individual files.
    :param pickle_list: list of pickle files
    :param dials_refls: list of DIALS integrated reflections
    :param dials_expts: list of DIALS experiment jsons
    :param n_images: find at most this number of images
    :param dials: use the dials_refls and dials_expts arguments to construct the clusters (default: False)
    :param use_b: Boolean. If True, intialise Scale and B. If False, use only
    mean intensity scalling.
    """

    data = []

    def sort_dials_raw_input(raw):
      expts = []
      refls = []
      for path in raw:
        if path.endswith(".pickle"):
          refls.append(path)
        elif path.endswith(".json"):
          expts.append(path)
      return (refls, expts)

    def done():
      if n_images is None:
        return False
      return len(data) >= n_images

    if dials:
      if raw_input is not None:
        r, e = sort_dials_raw_input(raw_input)
        dials_refls.extend(r)
        dials_expts.extend(e)
      for r, e in zip(dials_refls, dials_expts):
        this_frame = SingleDialsFrameFromFiles(refls_path=r, expts_path=e, **kwargs)
        if hasattr(this_frame, 'miller_array'):
          data.append(this_frame)
          if done():
            break
        else:
          logger.info('skipping reflections {} and experiments {}'.format(r, e))
    elif json:
      if raw_input is not None:
        r, e = sort_dials_raw_input(raw_input)
        dials_expts.extend(e)
      dials_expts_ids = [os.path.join(os.path.dirname(e), os.path.basename(e).split("_")[0])
                         for e in dials_expts]
      for e in dials_expts:
        name = os.path.join(os.path.dirname(e), os.path.basename(e).split("_")[0])
        this_frame = SingleDialsFrameFromJson(expts_path=e,  **kwargs)
        this_frame.name=name
        data.append(this_frame)
        if done():
            break
    else:
      if raw_input is not None:
        pickle_list.extend(raw_input)
      print "There are %d input files"%(len(pickle_list))
      from xfel.command_line.print_pickle import generate_data_from_streams
      for data_dict in generate_data_from_streams(pickle_list):
        this_frame = SingleFrame(dicti=data_dict, **kwargs)
        if hasattr(this_frame, 'miller_array'):
          data.append(this_frame)
          if done():
            break
        else:
          logger.info('skipping file {}'.format(os.path.basename(path)))
      print "%d lattices will be analyzed"%(len(data))

    return cls(data, _prefix, _message)
예제 #12
0
    def from_files(cls,
                   raw_input=None,
                   pickle_list=[],
                   dials_refls=[],
                   dials_expts=[],
                   _prefix='cluster_from_file',
                   _message='Made from list of individual files',
                   n_images=None,
                   dials=False,
                   **kwargs):
        """Constructor to get a cluster from a list of individual files.
    :param pickle_list: list of pickle files
    :param dials_refls: list of DIALS integrated reflections
    :param dials_expts: list of DIALS experiment jsons
    :param n_images: find at most this number of images
    :param dials: use the dials_refls and dials_expts arguments to construct the clusters (default: False)
    :param use_b: Boolean. If True, intialise Scale and B. If False, use only
    mean intensity scalling.
    """

        data = []

        def sort_dials_raw_input(raw):
            expts = []
            refls = []
            for path in raw:
                if path.endswith(".pickle"):
                    refls.append(path)
                elif path.endswith(".json"):
                    expts.append(path)
            return (refls, expts)

        def done():
            if n_images is None:
                return False
            return len(data) >= n_images

        if dials:
            if raw_input is not None:
                r, e = sort_dials_raw_input(raw_input)
                dials_refls.extend(r)
                dials_expts.extend(e)
            dials_refls_ids = [
                os.path.join(os.path.dirname(r),
                             os.path.basename(r).split("_")[0])
                for r in dials_refls
            ]
            dials_expts_ids = [
                os.path.join(os.path.dirname(e),
                             os.path.basename(e).split("_")[0])
                for e in dials_expts
            ]
            matches = [(dials_refls[i],
                        dials_expts[dials_expts_ids.index(dials_refls_ids[i])])
                       for i in xrange(len(dials_refls_ids))
                       if dials_refls_ids[i] in dials_expts_ids]
            for (r, e) in matches:
                this_frame = SingleDialsFrameFromFiles(refls_path=r,
                                                       expts_path=e,
                                                       **kwargs)
                if hasattr(this_frame, 'miller_array'):
                    data.append(this_frame)
                    if done():
                        break
                else:
                    logging.info(
                        'skipping reflections {} and experiments {}'.format(
                            r, e))
        else:
            if raw_input is not None:
                pickle_list.extend(raw_input)
            for path in pickle_list:
                this_frame = SingleFrame(path, os.path.basename(path),
                                         **kwargs)
                if hasattr(this_frame, 'miller_array'):
                    data.append(this_frame)
                    if done():
                        break
                else:
                    logging.info('skipping file {}'.format(
                        os.path.basename(path)))

        return cls(data, _prefix, _message)
예제 #13
0
  def ab_cluster(self, threshold=10000, method='distance',
                 linkage_method='single', log=False,
                 ax=None, write_file_lists=True, schnell=False, doplot=True,
                 labels='default'):
    """
    Hierarchical clustering using the unit cell dimentions.

    :param threshold: the threshold to use for prunning the tree into clusters.
    :param method: which clustering method from scipy to use when creating the tree (see scipy.cluster.hierarchy)
    :param linkage_method: which linkage method from scipy to use when creating the linkages. x (see scipy.cluster.hierarchy)
    :param log: if True, use log scale on y axis.
    :param ax: if a matplotlib axes object is provided, plot to this. Otherwise, create a new axes object and display on screen.
    :param write_file_lists: if True, write out the files that make up each cluster.
    :param schnell: if True, use simple euclidian distance, otherwise, use Andrews-Berstein distance from Andrews & Bernstein J Appl Cryst 47:346 (2014) on the Niggli cells.
    :param doplot: Boolean flag for if the plotting should be done at all.
    Runs faster if switched off.
    :param labels: 'default' will not display any labels for more than 100 images, but will display file names for fewer. This can be manually overidden with a boolean flag.
    :return: A list of Clusters ordered by largest Cluster to smallest

    .. note::
      Use 'schnell' option with caution, since it can cause strange behaviour
      around symmetry boundaries.
    """

    logging.info("Hierarchical clustering of unit cells")
    import scipy.spatial.distance as dist
    import scipy.cluster.hierarchy as hcluster

    # 1. Create a numpy array of G6 cells
    g6_cells = np.array([SingleFrame.make_g6(image.uc)
                         for image in self.members])

    # 2. Do hierarchichal clustering, using the find_distance method above.
    if schnell:
      logging.info("Using Euclidean distance")
      pair_distances = dist.pdist(g6_cells, metric='euclidean')
      logging.info("Distances have been calculated")
      this_linkage = hcluster.linkage(pair_distances,
                                      method=linkage_method,
                                      metric='euclidean')
    else:
      logging.info("Using Andrews-Bernstein distance from Andrews & Bernstein "
                   "J Appl Cryst 47:346 (2014)")
      pair_distances = dist.pdist(g6_cells,
                                metric=lambda a, b: NCDist(a, b))
      logging.info("Distances have been calculated")
      this_linkage = hcluster.linkage(pair_distances,
                                      method=linkage_method,
                                      metric=lambda a, b: NCDist(a, b))
    cluster_ids = hcluster.fcluster(this_linkage,
                                    threshold,
                                    criterion=method)
    logging.debug("Clusters have been calculated")

    # 3. Create an array of sub-cluster objects from the clustering
    sub_clusters = []
    for cluster in range(max(cluster_ids)):
      info_string = ('Made using ab_cluster with t={},'
                     ' {} method, and {} linkage').format(threshold,
                                                          method,
                                                          linkage_method)
      sub_clusters.append(self.make_sub_cluster([self.members[i]
                                                 for i in
                                                 range(len(self.members))
                                                 if
                                                 cluster_ids[i] == cluster + 1],
                                                'cluster_{}'.format(
                                                  cluster + 1),
                                                info_string))

    sub_clusters = sorted(sub_clusters, key=lambda x: len(x.members))
    # Rename to order by size
    for num, cluster in enumerate(sub_clusters):
      cluster.cname = 'cluster_{}'.format(num + 1)

    # 3.5 optionally write out the clusters to files.
    if write_file_lists:
      for cluster in sub_clusters:
        if len(cluster.members) > 1:
          cluster.dump_file_list(out_file_name="{}.lst".format(cluster.cname))

    if doplot:
      if labels is True:
        labels = [image.name for image in self.members]
      elif labels is False:
        labels = ['' for _ in self.members]
      elif labels == 'default':
        if len(self.members) > 100:
          labels = ['' for _ in self.members]
        else:
          labels = [image.name for image in self.members]
      else:
         labels = [getattr(v, labels, '') for v in self.members]

      # 4. Plot a dendogram to the axes if no axis is passed, otherwise just
      #    return the axes object
      if ax is None:
        fig = plt.figure("Distance Dendogram")
        ax = fig.gca()
        direct_visualisation = True
      else:
        direct_visualisation = False

      hcluster.dendrogram(this_linkage,
                          labels=labels,
                          leaf_font_size=8, leaf_rotation=90.0,
                          color_threshold=threshold, ax=ax)

      if log:
        ax.set_yscale("symlog", linthreshx=(-1,1))
      else:
        ax.set_ylim(-ax.get_ylim()[1] / 100, ax.get_ylim()[1])

      if direct_visualisation:
        fig.savefig("{}_dendogram.pdf".format(self.cname))
        plt.show()

    return sub_clusters, ax