예제 #1
0
  def from_iterable( cls,iterable,
                     _prefix='cluster_from_iterable',
                     _message='Made from list of individual cells',
                     **kwargs):
    """Constructor to get a cluster from an iterable (a list or tuple).  The
    file must list unit cell a,b,c,alpha,beta,gamma and space_group_type,
    each as a single token.
    :param iterable: a list or a tuple
    """

    data = []
    from xfel.clustering.singleframe import CellOnlyFrame
    from cctbx.uctbx import unit_cell
    from cctbx.sgtbx import space_group_info
    from cctbx import crystal

    for j,item in enumerate(iterable):
      try:
        assert len(item) == 7
        unit_cell_params = tuple([float(t) for t in item[0:6]])
        space_group_type = item[6]
        uc_init = unit_cell(unit_cell_params)
        sgi = space_group_info(space_group_type)
        crystal_symmetry = crystal.symmetry(unit_cell=uc_init, space_group_info=sgi)
        name = "lattice%07d"%j
        this_frame = CellOnlyFrame(crystal_symmetry, path=name, name=name)
        if hasattr(this_frame, 'crystal_symmetry'):
            data.append(this_frame)
      except Exception as e:
        pass

    return cls(data, _prefix, _message)
예제 #2
0
  def from_crystal_symmetries(cls, crystal_symmetries,
                              lattice_ids=None,
                              _prefix='cluster_from_crystal_symmetries',
                              _message='Made from list of individual cells',
                              n_images=None,
                              dials=False,
                              **kwargs):
    """Constructor to get a cluster from a list of crystal symmetries.
    """

    data = []

    from xfel.clustering.singleframe import CellOnlyFrame
    if lattice_ids is not None:
      assert len(lattice_ids) == len(crystal_symmetries)
    for j, cs in enumerate(crystal_symmetries):
      name = "lattice%07d"%j
      lattice_id = None
      if lattice_ids is not None:
        lattice_id = lattice_ids[j]
      this_frame = CellOnlyFrame(
        crystal_symmetry=cs, path=name, name=name, lattice_id=lattice_id)
      if hasattr(this_frame, 'crystal_symmetry'):
          data.append(this_frame)
      else:
          logger.info('skipping item {}'.format(item))
    logger.info("%d lattices will be analyzed"%(len(data)))

    return cls(data, _prefix, _message)
예제 #3
0
    def from_list(cls,
                  file_name,
                  raw_input=None,
                  pickle_list=[],
                  dials_refls=[],
                  dials_expts=[],
                  _prefix='cluster_from_file',
                  _message='Made from list of individual cells',
                  n_images=None,
                  dials=False,
                  **kwargs):
        """Constructor to get a cluster from a single file.  The file must list unit cell a,b,c,alpha,beta,gamma
    and space_group_type, each as a single token.
    :param file_name: pathname of the file
    """

        data = []

        from xfel.clustering.singleframe import CellOnlyFrame
        stream = open(file_name, "r").readlines()
        print "There are %d lines in the input file" % (len(stream))
        for j, item in enumerate(stream):
            tokens = item.strip().split()
            assert len(tokens) == 7, tokens
            unit_cell_params = tuple([float(t) for t in tokens[0:5]])
            space_group_type = tokens[6]
            from cctbx.uctbx import unit_cell
            uc_init = unit_cell(unit_cell_params)
            from cctbx.sgtbx import space_group_info
            sgi = space_group_info(space_group_type)
            from cctbx import crystal
            crystal_symmetry = crystal.symmetry(unit_cell=uc_init,
                                                space_group_info=sgi)
            name = "lattice%07d" % j
            this_frame = CellOnlyFrame(crystal_symmetry, path=name, name=name)
            if hasattr(this_frame, 'crystal_symmetry'):
                data.append(this_frame)
            else:
                logger.info('skipping item {}'.format(item))
        print "%d lattices will be analyzed" % (len(data))

        return cls(data, _prefix, _message)
예제 #4
0
def run_detail(show_plot, save_plot):
    P = Profiler("0. Read data")
    import sys
    file_name = sys.argv[1]
    from xfel.clustering.singleframe import CellOnlyFrame
    cells = []
    for line in open(file_name, "r").xreadlines():
        tokens = line.strip().split()
        cells.append(CellOnlyFrame(args=tokens, path=None))
    MM = [c.mm for c in cells]  # get all metrical matrices
    MM_double = flex.double()
    for i in xrange(len(MM)):
        Tup = MM[i]
        for j in xrange(6):
            MM_double.append(Tup[j])

    print("There are %d cells X" % (len(MM)))
    CX = 0
    CY = 3
    coord_x = flex.double([c.uc[CX] for c in cells])
    coord_y = flex.double([c.uc[CY] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use('Agg')  # use a non-interactive backend
        from matplotlib import pyplot as plt
        plt.plot(coord_x, coord_y, "k.", markersize=3.)
        #plt.axes().set_aspect("equal")
        if save_plot:
            plt.savefig(plot_name,
                        size_inches=(10, 10),
                        dpi=300,
                        bbox_inches='tight')
        if show_plot:
            plt.show()

    print "Now constructing a Dij matrix."
    P = Profiler("1. compute Dij matrix")
    NN = len(MM)

    from cctbx.uctbx.determine_unit_cell import NCDist_matrix, NCDist_flatten
    #Dij = NCDist_matrix(MM_double)
    Dij = NCDist_flatten(MM_double)

    #from cctbx.uctbx.determine_unit_cell import NCDist # can this be refactored with MPI?
    #Dij = flex.double(flex.grid(NN,NN))
    #for i in xrange(NN):
    #  for j in xrange(i+1,NN):
    #    Dij[i,j] = NCDist(MM[i], MM[j])
    del P

    d_c = 10000  # the distance cutoff, such that average item neighbors 1-2% of all items
    CM = clustering_manager(Dij=Dij, d_c=d_c)

    # Summarize the results here
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print len(cells), "have been analyzed"
    print("# ------------   %d CLUSTERS  ----------------" % (n_cluster))
    for i in xrange(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print "Cluster %d.  Central unit cell: item %d" % (i, item)
        cells[item].crystal_symmetry.show_summary()
        print "Cluster has %d items, or %d after trimming borders" % (
            (CM.cluster_id_full == i).count(True),
            (CM.cluster_id_final == i).count(True))
        print

    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        #Decision graph
        from matplotlib import pyplot as plt

        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in xrange(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

        #No-halo plot
        from matplotlib import pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]

        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidths=0.4,
                    edgecolor='k')
        for i in xrange(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[CX]], [cells[item].uc[CY]], 'y.')
        #plt.axes().set_aspect("equal")
        plt.show()

        #Final plot
        halo = (CM.cluster_id_final == -1)
        core = ~halo
        plt.plot(coord_x.select(halo), coord_y.select(halo), "k.")
        colors = [appcolors[i % 10] for i in CM.cluster_id_final.select(core)]
        plt.scatter(coord_x.select(core),
                    coord_y.select(core),
                    marker="o",
                    color=colors,
                    linewidths=0.4,
                    edgecolor='k')
        for i in xrange(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[CX]], [cells[item].uc[CY]], 'y.')
        #plt.axes().set_aspect("equal")
        plt.show()
예제 #5
0
def get_uc_consensus(experiments_list,
                     show_plot=False,
                     save_plot=False,
                     return_only_first_indexed_model=False,
                     finalize_method='reindex_with_known_crystal_models',
                     clustering_params=None):
    '''
  Uses the Rodriguez Laio 2014 method to do a hierarchical clustering of the crystal models and
  then vote for the highest consensus crystal mode. Input needs to be a list of experiments object.
  Clustering code taken from github.com/cctbx-xfel/cluster_regression
  Clustering is first done first based on unit cell dimensions. Then for each of the clusters identified,
  a further clustering is done based on orientational matrix A
  '''
    if return_only_first_indexed_model:
        return [experiments_list[0].crystals()[0]], None
    cells = []

    from xfel.clustering.singleframe import CellOnlyFrame
    # Flag for testing Lysozyme data from NKS.Make sure cluster_regression repository is present and configured
    # Program will exit after plots are displayed if this flag is true
    test_nks = False
    if clustering_params is None:
        clustering_params = clustering_iota_scope

    if test_nks:
        from cctbx import crystal
        import libtbx.load_env
        cluster_regression = libtbx.env.find_in_repositories(
            relative_path="cluster_regression", test=os.path.isdir)
        file_name = os.path.join(cluster_regression, 'examples',
                                 'lysozyme1341.txt')
        for line in open(file_name, "r").xreadlines():
            tokens = line.strip().split()
            unit_cell = tuple(float(x) for x in tokens[0:6])
            space_group_symbol = tokens[6]
            crystal_symmetry = crystal.symmetry(
                unit_cell=unit_cell, space_group_symbol=space_group_symbol)
            cells.append(CellOnlyFrame(crystal_symmetry))
    else:
        clustered_experiments_list = flex.int()
        for experiment in experiments_list:
            if len(experiment.crystals()) > 1:
                print('IOTA:Should have only one crystal model')
            crystal_symmetry = experiment.crystals()[0].get_crystal_symmetry()
            cells.append(CellOnlyFrame(crystal_symmetry))
            # Maintain a list which is meaningless right now that will finally contain the
            # final clustering results
            clustered_experiments_list.append(-1)
    MM = [c.mm for c in cells]  # metrical matrices
    MM_double = flex.double()
    for i in range(len(MM)):
        Tup = MM[i]
        for j in range(6):
            MM_double.append(Tup[j])
    print('There are %d cells' % len(MM))
    coord_x = flex.double([c.uc[0] for c in cells])
    coord_y = flex.double([c.uc[1] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        plt.plot([c.uc[0] for c in cells], [c.uc[1] for c in cells],
                 "k.",
                 markersize=3.)
        plt.axes().set_aspect("equal")
    if save_plot:
        plot_name = 'uc_cluster.png'
        plt.savefig(plot_name,
                    size_inches=(10, 10),
                    dpi=300,
                    bbox_inches='tight')
    if show_plot:
        plt.show()
    print('Now constructing a Dij matrix: Starting Unit Cell clustering')
    NN = len(MM)
    from cctbx.uctbx.determine_unit_cell import NCDist_flatten
    Dij = NCDist_flatten(MM_double)
    from scitbx.math import five_number_summary
    d_c = clustering_params.d_c  #five_number_summary(list(Dij))[1]
    d_c = estimate_d_c(Dij)
    #d_c = flex.mean_and_variance(Dij.as_1d()).unweighted_sample_standard_deviation()
    print('d_c = ', d_c)
    if len(cells) < 5:
        return [experiments_list[0].crystals()[0]], None
    CM = clustering_manager(
        Dij=Dij,
        d_c=d_c,
        max_percentile_rho=clustering_params.max_percentile_rho_uc,
        Z_delta=clustering_params.Z_delta,
        strategy='strategy_3')
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print(len(cells), ' datapoints have been analyzed')
    print('%d CLUSTERS' % n_cluster)
    for i in range(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print('Cluster %d central Unit cell = %d' % (i, item))
        cells[item].crystal_symmetry.show_summary()

    # More plots for debugging
    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        # Decision graph
        import matplotlib.pyplot as plt
        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in range(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

    if show_plot:
        import matplotlib.pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]
        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidth=0.4,
                    edgecolor='k')
        for i in range(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[0]], cells[item].uc[1], 'y.')
            plt.axes().set_aspect("equal")
            plt.show()
    if test_nks:
        exit()

    # Now look at each unit cell cluster for orientational clustering
    # idea is to cluster the orientational component in each of the unit cell clusters
    #
    do_orientational_clustering = not return_only_first_indexed_model  # temporary.
    dxtbx_crystal_models = []
    if do_orientational_clustering:
        print('IOTA: Starting orientational clustering')
        Dij_ori = {}  # dictionary to store Dij for each cluster
        uc_experiments_list = {
        }  # dictionary to store experiments_lists for each cluster
        from collections import Counter
        uc_cluster_count = Counter(list(CM.cluster_id_final))
        # instantiate the Dij_ori flat 1-d array
        # Put all experiments list from same uc cluster together
        if True:
            from scitbx.matrix import sqr
            from cctbx_orientation_ext import crystal_orientation
            crystal_orientation_list = []
            all_A = []
            for i in range(len(experiments_list)):
                crystal_orientation_list.append(
                    crystal_orientation(
                        experiments_list[i].crystals()[0].get_A(), True))
                #exit()
                A_direct = sqr(crystal_orientation_list[i].reciprocal_matrix()
                               ).transpose().inverse()
                all_A.append(A_direct[0])
                #print ("Direct A matrix 1st element = %12.6f %12.6f %12.6f"%(A_direct[0], A_direct[1], A_direct[2]))
        #  exit()
        CM_mapping = {}
        for i in range(len(experiments_list)):
            if CM.cluster_id_full[i] not in uc_experiments_list:
                uc_experiments_list[CM.cluster_id_full[i]] = []
                CM_mapping[CM.cluster_id_full[i]] = []
            uc_experiments_list[CM.cluster_id_full[i]].append(
                experiments_list[i])
            # Maintain mapping between original experiments_list and uc_exeriments_list
            # Mapping: key> index_in_experiments_list | value> cluster_id, index_in_uc_cluster
            CM_mapping[CM.cluster_id_full[i]].append(
                (i, len(uc_experiments_list[CM.cluster_id_full[i]]) - 1))
        for cluster in uc_cluster_count:
            # Make sure there are atleast a minimum number of samples in the cluster
            if uc_cluster_count[cluster] < clustering_params.min_datapts:
                continue
            Dij_ori[cluster] = flex.double(
                [[0.0] * uc_cluster_count[cluster]] *
                uc_cluster_count[cluster])
            # Now populate the Dij_ori array
            N_samples_in_cluster = len(uc_experiments_list[cluster])
            for i in range(N_samples_in_cluster - 1):
                for j in range(i + 1, N_samples_in_cluster):
                    dij_ori = get_dij_ori(
                        uc_experiments_list[cluster][i].crystals()[0],
                        uc_experiments_list[cluster][j].crystals()[0])
                    A_direct_i = sqr(
                        uc_experiments_list[cluster][i].crystals()
                        [0].get_A()).transpose().inverse()
                    A_direct_j = sqr(
                        uc_experiments_list[cluster][j].crystals()
                        [0].get_A()).transpose().inverse()
                    #print ("Direct A matrix 1st element = %12.6f %12.6f %12.6f %12.6f %12.6f %12.6f %12.6f"%(dij_ori, A_direct_i[0], A_direct_j[0], A_direct_i[1],A_direct_j[1], A_direct_i[2], A_direct_j[2] ))
                    Dij_ori[cluster][N_samples_in_cluster * i + j] = dij_ori
                    Dij_ori[cluster][N_samples_in_cluster * j + i] = dij_ori

        # Now do the orientational cluster analysis
        d_c_ori = clustering_params.d_c_ori  # 0.13
        from exafel_project.ADSE13_25.clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-Dij_ori[1]/flex.max(Dij_ori[1]), show_plot=True)
        A_matrices = []
        for cluster in Dij_ori:
            #if cluster == 2:
            #  CM_ori = clustering_manager(Dij=Dij_ori[cluster], d_c=d_c_ori, max_percentile_rho=0.85, debug=True)
            d_c_ori = estimate_d_c(Dij_ori[cluster])
            #else:
            #d_c_ori=flex.mean_and_variance(Dij_ori[cluster].as_1d()).unweighted_sample_standard_deviation()
            print('d_c_ori=', d_c_ori)
            CM_ori = clustering_manager(
                Dij=Dij_ori[cluster],
                d_c=d_c_ori,
                max_percentile_rho=clustering_params.max_percentile_rho_ori,
                Z_delta=clustering_params.Z_delta,
                strategy='strategy_3')
            n_cluster_ori = 1 + flex.max(CM_ori.cluster_id_final)
            #from IPython import embed; embed(); exit()
            for i in range(n_cluster_ori):
                if len([zz for zz in CM_ori.cluster_id_final if zz == i
                        ]) < clustering_params.min_datapts:
                    continue
                item = flex.first_index(CM_ori.cluster_id_maxima, i)
                dxtbx_crystal_model = uc_experiments_list[cluster][
                    item].crystals()[0]
                dxtbx_crystal_models.append(dxtbx_crystal_model)
                # Map the orientational clusters to the original experiments_list indices
                # This should be the final list of clusters!
                for j, ori_cluster_id in enumerate(CM_ori.cluster_id_final):
                    if ori_cluster_id == i:
                        xx, yy = CM_mapping[cluster][j]
                        clustered_experiments_list[xx] = len(
                            dxtbx_crystal_models) - 1
                from scitbx.matrix import sqr
                from cctbx_orientation_ext import crystal_orientation
                crystal_orientation = crystal_orientation(
                    dxtbx_crystal_model.get_A(), True)
                A_direct = sqr(crystal_orientation.reciprocal_matrix()
                               ).transpose().inverse()
                A_matrices.append(A_direct)
                print(
                    "IOTA: Direct A matrix 1st element of orientational cluster %d  = %12.6f"
                    % (i, A_direct[0]))
                print(A_direct)
            if show_plot:
                # Decision graph
                stretch_plot_factor = 1.05  # (1+fraction of limits by which xlim,ylim should be set)
                import matplotlib.pyplot as plt
                plt.plot(CM_ori.rho, CM_ori.delta, "r.", markersize=3.)
                for x in range(len(list(CM_ori.cluster_id_final))):
                    if CM_ori.cluster_id_maxima[x] >= 0:
                        plt.plot([CM_ori.rho[x]], [CM_ori.delta[x]], "ro")
                #exit()
                plt.xlim([-10, stretch_plot_factor * flex.max(CM_ori.rho)])
                plt.ylim([-10, stretch_plot_factor * flex.max(CM_ori.delta)])
                plt.show()
    # FIXME Still to be worked out what exactly should be returned
    #if return_only_first_indexed_model:
    #  return [experiments_list[0].crystals()[0]], clustered_experiments_list
    # Make sure the crystal models are not too close to each other
    # FIXME should be a PHIL
    #from IPython import embed; embed(); exit()
    min_angle = 5.0  # taken from indexer.py
    close_models_list = []
    # Not used really; other fixes have been made to code to figure out outliers
    # Still keeping this in case it it useful later on.
    if len(dxtbx_crystal_models) > 10000:
        from dials.algorithms.indexing.compare_orientation_matrices import difference_rotation_matrix_axis_angle
        from cctbx_orientation_ext import crystal_orientation
        from dxtbx.model import Crystal
        for i_a in range(0, len(dxtbx_crystal_models) - 1):
            for i_b in range(i_a + 1, len(dxtbx_crystal_models)):
                cryst_a = dxtbx_crystal_models[i_a]
                cryst_b = dxtbx_crystal_models[i_b]
                cryst_a_ori = crystal_orientation(cryst_a.get_A(), True)
                cryst_b_ori = crystal_orientation(cryst_b.get_A(), True)
                try:
                    best_similarity_transform = cryst_b_ori.best_similarity_transformation(
                        other=cryst_a_ori,
                        fractional_length_tolerance=20.00,
                        unimodular_generator_range=1)
                    cryst_b_ori_best = cryst_b_ori.change_basis(
                        best_similarity_transform)
                except Exception as e:
                    cryst_b_ori_best = cryst_b_ori

                # FIXME hardcoded space group for myoglobin LS49
                cryst_b_best = Crystal(cryst_b_ori_best.direct_matrix()[0:3],
                                       cryst_b_ori_best.direct_matrix()[3:6],
                                       cryst_b_ori_best.direct_matrix()[6:9],
                                       'P 1 21 1')
                R_ab, axis, angle, cb_op_ab = difference_rotation_matrix_axis_angle(
                    cryst_a, cryst_b_best)
                # FIXME
                if abs(angle) < min_angle:  # degrees
                    close_models_list.append((i_a, i_b))

    # Now prune the dxtbx_crystal_models list
        unique_experiments_list = flex.int(range(len(dxtbx_crystal_models)))
        for close_models in close_models_list:
            i_a, i_b = close_models
            if dxtbx_crystal_models[i_a] is not None and dxtbx_crystal_models[
                    i_b] is not None:
                dxtbx_crystal_models[i_b] = None
                unique_experiments_list[i_b] = i_a
                clustered_experiments_list.set_selected(
                    clustered_experiments_list == i_b, i_a)

        counter = -1
        for ii, model in enumerate(dxtbx_crystal_models):
            if model is not None:
                counter += 1
                clustered_experiments_list.set_selected(
                    clustered_experiments_list == unique_experiments_list[ii],
                    counter)
        dxtbx_crystal_models = [
            x for x in dxtbx_crystal_models if x is not None
        ]

    #from IPython import embed; embed(); exit()
    if len(dxtbx_crystal_models) > 0:
        return dxtbx_crystal_models, list(clustered_experiments_list)
    else:
        # If nothing works, atleast return the 1st crystal model that was found
        return [experiments_list[0].crystals()[0]], None
def run_detail(show_plot, save_plot, use_dummy_data=False):
    file_name = sys.argv[1]
    from xfel.clustering.singleframe import CellOnlyFrame
    from cctbx import crystal
    cells = []
    for line in open(file_name, "r").xreadlines():
        tokens = line.strip().split()
        unit_cell = tuple(float(x) for x in tokens[0:6])
        space_group_symbol = tokens[6]
        crystal_symmetry = crystal.symmetry(
            unit_cell=unit_cell, space_group_symbol=space_group_symbol)
        cells.append(CellOnlyFrame(crystal_symmetry, path=None))
    MM = [c.mm for c in cells]  # get all metrical matrices
    from scitbx.array_family import flex
    MM_double = flex.double()
    for i in range(len(MM)):
        Tup = MM[i]
        for j in range(6):
            MM_double.append(Tup[j])

    print("There are %d cells" % (len(MM)))
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use('Agg')  # use a non-interactive backend
        from matplotlib import pyplot as plt
        plt.figure(1)
        plt.plot([c.uc[0] for c in cells], [c.uc[1] for c in cells],
                 "k.",
                 markersize=3.)
        plt.axes().set_aspect("equal")
        if save_plot:
            plt.savefig(plot_name,
                        size_inches=(10, 10),
                        dpi=300,
                        bbox_inches='tight')
        if show_plot:
            plt.show()

    print("Now constructing a Dij matrix.")
    NN = len(MM)
    import omptbx
    omptbx.omp_set_num_threads(64)
    from cctbx.uctbx.determine_unit_cell import NCDist_flatten
    if use_dummy_data:
        '''
      Generate blob data using sklearn. See example here.
      http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_comparison.html
      '''
        try:
            from sklearn import datasets
        except ImportError:
            print(
                "Module sklearn not available. Needed to generate dummy data.")
        import numpy as np
        NN = 100
        blobs = datasets.make_blobs(n_samples=NN, random_state=22)
        xx = []
        yy = []
        Dij = np.zeros([NN, NN])
        Dij = flex.double(Dij)
        for x, y in blobs[0]:
            xx.append(x)
            yy.append(y)
        # Get Dij matrix
        for i in range(len(xx)):
            for j in range(len(xx)):
                dij2 = (xx[i] - xx[j]) * (xx[i] - xx[j]) + (yy[i] - yy[j]) * (
                    yy[i] - yy[j])
                dij = np.sqrt(dij2)
                Dij[i * len(xx) + j] = dij
        if show_plot:
            import matplotlib.pyplot as plt
            #plt.figure()
            plt.scatter(xx, yy)
            plt.show()
    else:
        Dij = NCDist_flatten(MM_double)  # loop is flattened
    plot_with_dimensional_embedding(Dij / flex.max(Dij), show_plot=show_plot)
def get_uc_consensus(experiments_list,
                     show_plot=False,
                     return_only_first_indexed_model=False,
                     finalize_method=None,
                     clustering_params=None):
    '''
  Uses the Rodriguez Laio 2014 method to do a clustering of the unit cells and then vote for the highest
  consensus unit cell. Input needs to be a list of experiments object.
  Clustering code taken from github.com/cctbx-xfel/cluster_regression
  Returns an experiment object with crystal unit cell from the cluster with the most points
  '''
    if return_only_first_indexed_model:
        return [experiments_list[0].crystals()[0]], None
    cells = []
    from xfel.clustering.singleframe import CellOnlyFrame
    save_plot = False
    # Flag for testing Lysozyme data from NKS.Make sure cluster_regression repository is present and configured
    # Program will exit after plots are displayed if this flag is true
    test_nks = False
    if test_nks:
        from cctbx import crystal
        import libtbx.load_env
        cluster_regression = libtbx.env.find_in_repositories(
            relative_path="cluster_regression", test=os.path.isdir)
        file_name = os.path.join(cluster_regression, 'examples',
                                 'lysozyme1341.txt')
        for line in open(file_name, "r").xreadlines():
            tokens = line.strip().split()
            unit_cell = tuple(float(x) for x in tokens[0:6])
            space_group_symbol = tokens[6]
            crystal_symmetry = crystal.symmetry(
                unit_cell=unit_cell, space_group_symbol=space_group_symbol)
            cells.append(CellOnlyFrame(crystal_symmetry))
    else:
        for experiment in experiments_list:
            if len(experiment.crystals()) > 1:
                print('IOTA:Should have only one crystal model')
            crystal_symmetry = experiment.crystals()[0].get_crystal_symmetry()
            cells.append(CellOnlyFrame(crystal_symmetry))
    MM = [c.mm for c in cells]  # metrical matrices
    MM_double = flex.double()
    for i in range(len(MM)):
        Tup = MM[i]
        for j in range(6):
            MM_double.append(Tup[j])
    print('There are %d cells' % len(MM))
    coord_x = flex.double([c.uc[0] for c in cells])
    coord_y = flex.double([c.uc[1] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        #from IPython import embed; embed(); exit()
        plt.plot([c.uc[0] for c in cells], [c.uc[1] for c in cells],
                 "k.",
                 markersize=3.)
        plt.axes().set_aspect("equal")
    if save_plot:
        plot_name = 'uc_cluster.png'
        plt.savefig(plot_name,
                    size_inches=(10, 10),
                    dpi=300,
                    bbox_inches='tight')
    if show_plot:
        plt.show()
    print('Now constructing a Dij matrix: Starting Unit Cell clustering')
    NN = len(MM)
    from cctbx.uctbx.determine_unit_cell import NCDist_flatten
    Dij = NCDist_flatten(MM_double)
    d_c = flex.mean_and_variance(
        Dij.as_1d()).unweighted_sample_standard_deviation()  #6.13
    #FIXME should be a PHIL param
    if len(cells) < 5:
        return [experiments_list[0].crystals()[0]], None
    CM = clustering_manager(Dij=Dij, d_c=d_c, max_percentile_rho=0.95)
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print(len(cells), ' datapoints have been analyzed')
    print('%d CLUSTERS' % n_cluster)
    for i in range(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print('Cluster %d central Unit cell = %d' % (i, item))
        cells[item].crystal_symmetry.show_summary()

    # More plots for debugging
    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        # Decision graph
        import matplotlib.pyplot as plt
        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in range(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

    if show_plot:
        import matplotlib.pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]
        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidth=0.4,
                    edgecolor='k')
        for i in range(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[0]], cells[item].uc[1], 'y.')
            plt.axes().set_aspect("equal")
            plt.show()
    if test_nks:
        exit()

    # Now look at each unit cell cluster for orientational clustering
    # idea is to cluster the orientational component in each of the unit cell clusters
    #
    do_orientational_clustering = not return_only_first_indexed_model  # temporary.
    dxtbx_crystal_models = []
    if do_orientational_clustering:
        print('IOTA: Starting orientational clustering')
        Dij_ori = {}  # dictionary to store Dij for each cluster
        uc_experiments_list = {
        }  # dictionary to store experiments_lists for each cluster
        from collections import Counter
        uc_cluster_count = Counter(list(CM.cluster_id_final))
        # instantiate the Dij_ori flat 1-d array
        # Put all experiments list from same uc cluster together
        if True:
            from scitbx.matrix import sqr
            from cctbx_orientation_ext import crystal_orientation
            #crystal_orientation_list = []
            #for i in range(len(experiments_list)):
            #  crystal_orientation_list.append(crystal_orientation(experiments_list[i].crystals()[0].get_A(), True))
            #from IPython import embed; embed(); exit()
            #A_direct = sqr(crystal_orientation_list[i].reciprocal_matrix()).transpose().inverse()
            #print ("Direct A matrix 1st element = %12.6f"%A_direct[0])
        for i in range(len(experiments_list)):
            if CM.cluster_id_full[i] not in uc_experiments_list:
                uc_experiments_list[CM.cluster_id_full[i]] = []
            uc_experiments_list[CM.cluster_id_full[i]].append(
                experiments_list[i])
        for cluster in uc_cluster_count:
            # Make sure there are atleast a minimum number of samples in the cluster
            if uc_cluster_count[cluster] < 5:
                continue
            Dij_ori[cluster] = flex.double(
                [[0.0] * uc_cluster_count[cluster]] *
                uc_cluster_count[cluster])
            # Now populate the Dij_ori array
            N_samples_in_cluster = len(uc_experiments_list[cluster])
            for i in range(N_samples_in_cluster - 1):
                for j in range(i + 1, N_samples_in_cluster):
                    dij_ori = get_dij_ori(
                        uc_experiments_list[cluster][i].crystals()[0],
                        uc_experiments_list[cluster][j].crystals()[0])
                    Dij_ori[cluster][N_samples_in_cluster * i + j] = dij_ori
                    Dij_ori[cluster][N_samples_in_cluster * j + i] = dij_ori

        # Now do the orientational cluster analysis
        #from IPython import embed; embed(); exit()
        d_c_ori = 0.13
        from exafel_project.ADSE13_25.clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-Dij_ori[1]/flex.max(Dij_ori[1]), show_plot=True)
        for cluster in Dij_ori:
            d_c_ori = flex.mean_and_variance(Dij_ori[cluster].as_1d(
            )).unweighted_sample_standard_deviation()
            CM_ori = clustering_manager(Dij=Dij_ori[cluster],
                                        d_c=d_c_ori,
                                        max_percentile_rho=0.85)
            n_cluster_ori = 1 + flex.max(CM_ori.cluster_id_final)
            #from IPython import embed; embed()
            #FIXME should be a PHIL param
            for i in range(n_cluster_ori):
                if len([zz for zz in CM_ori.cluster_id_final if zz == i]) < 5:
                    continue
                item = flex.first_index(CM_ori.cluster_id_maxima, i)
                dxtbx_crystal_model = uc_experiments_list[cluster][
                    item].crystals()[0]
                dxtbx_crystal_models.append(dxtbx_crystal_model)
                from scitbx.matrix import sqr
                from cctbx_orientation_ext import crystal_orientation
                crystal_orientation = crystal_orientation(
                    dxtbx_crystal_model.get_A(), True)
                A_direct = sqr(crystal_orientation.reciprocal_matrix()
                               ).transpose().inverse()
                print(
                    "IOTA: Direct A matrix 1st element of orientational cluster %d  = %12.6f"
                    % (i, A_direct[0]))
            if show_plot:
                # Decision graph
                stretch_plot_factor = 1.05  # (1+fraction of limits by which xlim,ylim should be set)
                import matplotlib.pyplot as plt
                plt.plot(CM_ori.rho, CM_ori.delta, "r.", markersize=3.)
                for x in range(len(list(CM_ori.cluster_id_final))):
                    if CM_ori.cluster_id_maxima[x] >= 0:
                        plt.plot([CM_ori.rho[x]], [CM_ori.delta[x]], "ro")
                #from IPython import embed; embed(); exit()
                plt.xlim([-10, stretch_plot_factor * flex.max(CM_ori.rho)])
                plt.ylim([-10, stretch_plot_factor * flex.max(CM_ori.delta)])
                plt.show()
    # Make sure the crystal models are not too close to each other
    # FIXME should be a PHIL
    min_angle = 5.0  # taken from indexer.py
    close_models_list = []
    if len(dxtbx_crystal_models) > 1:
        from dials.algorithms.indexing.compare_orientation_matrices import difference_rotation_matrix_axis_angle
        for i_a in range(0, len(dxtbx_crystal_models) - 1):
            for i_b in range(i_a, len(dxtbx_crystal_models)):
                cryst_a = dxtbx_crystal_models[i_a]
                cryst_b = dxtbx_crystal_models[i_b]
                R_ab, axis, angle, cb_op_ab = difference_rotation_matrix_axis_angle(
                    cryst_a, cryst_b)
                # FIXME
                if abs(angle) < min_angle:  # degrees
                    close_models_list.append((i_a, i_b))

    # Now prune the dxtbx_crystal_models list
    for close_models in close_models_list:
        i_a, i_b = close_models
        if dxtbx_crystal_models[i_a] is not None and dxtbx_crystal_models[
                i_b] is not None:
            dxtbx_crystal_models[i_a] = None

    dxtbx_crystal_models = [x for x in dxtbx_crystal_models if x is not None]
    if len(dxtbx_crystal_models) > 0:
        return dxtbx_crystal_models, None
    else:
        # If nothing works, atleast return the 1st crystal model that was found
        return [experiments_list[0].crystals()[0]], None