Exemplo n.º 1
0
            def show_plot(widegrid, excursi):
                excursi.reshape(flex.grid(widegrid, widegrid))
                plot_max = flex.max(excursi)
                idx_max = flex.max_index(excursi)

                def igrid(x):
                    return x - (widegrid // 2)

                idxs = [igrid(i) * plot_px_sz for i in xrange(widegrid)]

                from matplotlib import pyplot as plt
                plt.figure()
                CS = plt.contour(
                    [igrid(i) * plot_px_sz for i in xrange(widegrid)],
                    [igrid(i) * plot_px_sz for i in xrange(widegrid)],
                    excursi.as_numpy_array())
                plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f")
                plt.title("Wide scope search for detector origin offset")
                plt.scatter([0.0], [0.0], color='g', marker='o')
                plt.scatter([new_offset[0]], [new_offset[1]],
                            color='r',
                            marker='*')
                plt.scatter([idxs[idx_max % widegrid]],
                            [idxs[idx_max // widegrid]],
                            color='k',
                            marker='s')
                plt.axes().set_aspect("equal")
                plt.xlabel("offset (mm) along beamr1 vector")
                plt.ylabel("offset (mm) along beamr2 vector")
                plt.savefig("search_scope.png")

                #changing value
                trial_origin_offset = (idxs[idx_max % widegrid]) * beamr1 + (
                    idxs[idx_max // widegrid]) * beamr2
                return trial_origin_offset
      def show_plot(widegrid,excursi):
        excursi.reshape(flex.grid(widegrid, widegrid))
        plot_max = flex.max(excursi)
        idx_max = flex.max_index(excursi)

        def igrid(x): return x - (widegrid//2)
        idxs = [igrid(i)*plot_px_sz for i in xrange(widegrid)]

        from matplotlib import pyplot as plt
        plt.figure()
        CS = plt.contour([igrid(i)*plot_px_sz for i in xrange(widegrid)],
                         [igrid(i)*plot_px_sz for i in xrange(widegrid)], excursi.as_numpy_array())
        plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f")
        plt.title("Wide scope search for detector origin offset")
        plt.scatter([0.0],[0.0],color='g',marker='o')
        plt.scatter([new_offset[0]] , [new_offset[1]],color='r',marker='*')
        plt.scatter([idxs[idx_max%widegrid]] , [idxs[idx_max//widegrid]],color='k',marker='s')
        plt.axes().set_aspect("equal")
        plt.xlabel("offset (mm) along beamr1 vector")
        plt.ylabel("offset (mm) along beamr2 vector")
        plt.show()

        #changing value
        trial_origin_offset =  (idxs[idx_max%widegrid])*beamr1 + (idxs[idx_max//widegrid])*beamr2
        return trial_origin_offset
Exemplo n.º 3
0
def reduce_raw_data(raw_data,
                    qmax,
                    bandwidth,
                    level=0.05,
                    q_background=None,
                    outfile=''):
    log2 = sys.stdout

    with open(outfile, "a") as log:
        print >> log, " ====  Data reduction ==== "

        print >> log, "  Preprocessing of data increases efficiency of shape retrieval procedure.\n"

        print >> log, "   -  Interpolation stepsize                           :  %4.3e" % bandwidth
        print >> log, "   -  Uniform density criteria:  level is set to       :  %4.3e" % level
        print >> log, "                                 maximum q to consider :  %4.3e" % qmax

    print >> log2, " ====  Data reduction ==== "

    print >> log2, "  Preprocessing of data increases efficiency of shape retrieval procedure.\n"

    print >> log2, "   -  Interpolation stepsize                           :  %4.3e" % bandwidth
    print >> log2, "   -  Uniform density criteria:  level is set to       :  %4.3e" % level
    print >> log2, "                                 maximum q to consider :  %4.3e" % qmax

    qmin_indx = flex.max_index(raw_data.i)
    qmin = raw_data.q[qmin_indx]
    if qmax > raw_data.q[-1]:
        qmax = raw_data.q[-1]
    with open(outfile, "a") as log:
        print >> log, "      Resulting q range to use in  search:   q start   :  %4.3e" % qmin
        print >> log, "                                             q stop    :  %4.3e" % qmax

    print >> log2, "      Resulting q range to use in  search:   q start   :  %4.3e" % qmin
    print >> log2, "                                             q stop    :  %4.3e" % qmax
    raw_q = raw_data.q[qmin_indx:]
    raw_i = raw_data.i[qmin_indx:]
    raw_s = raw_data.s[qmin_indx:]
    ### Take care of the background (set zero at very high q) ###
    if (q_background is not None):
        cutoff = flex.bool(raw_q > q_background)
        q_bk_indx = flex.last_index(cutoff, False)
        if (q_bk_indx < raw_q.size()):
            bkgrd = flex.mean(raw_i[q_bk_indx:])
            with open(f, "a") as log:
                print >> log, "Background correction: I=I-background, where background=", bkgrd
            print >> log2, "Background correction: I=I-background, where background=", bkgrd
            raw_i = flex.abs(raw_i - bkgrd)

    q = flex.double(range(int(
        (qmax - qmin) / bandwidth) + 1)) * bandwidth + qmin
    raw_data.i = flex.linear_interpolation(raw_q, raw_i, q)
    raw_data.s = flex.linear_interpolation(raw_q, raw_s, q)
    raw_data.q = q

    return raw_data
Exemplo n.º 4
0
 def find_largest(self, matches, used_flags=None):
     sizes = flex.double()
     if used_flags is None:
         used_flags = flex.bool(len(matches), False)
     for match in matches:
         sizes.append(match[0].size())
     multi = flex.double(len(matches), 1)
     multi = multi.set_selected(used_flags.iselection(), 0)
     sizes = sizes * multi
     max_size = flex.max(sizes)
     max_loc = flex.max_index(sizes)
     return max_size, max_loc
Exemplo n.º 5
0
 def find_largest(self, matches, used_flags=None):
     sizes = flex.double()
     if used_flags is None:
         used_flags = flex.bool(len(matches), False)
     for match in matches:
         sizes.append(match[0].size())
     multi = flex.double(len(matches), 1)
     multi = multi.set_selected(used_flags.iselection(), 0)
     sizes = sizes * multi
     max_size = flex.max(sizes)
     max_loc = flex.max_index(sizes)
     return max_size, max_loc
Exemplo n.º 6
0
 def finite_difference_test(self):
   if(self.fmodel.r_work()>1.e-3):
     i_g_max = flex.max_index(flex.abs(self.g))
     eps = 1.e-5
     par_eps = list(self.par_min)
     par_eps[i_g_max] = self.par_min[i_g_max] + eps
     self.apply_shifts(par = par_eps)
     self.fmodel.update_xray_structure(update_f_calc=True)
     t1 = self.get_tg(compute_gradients=False).target()
     par_eps[i_g_max] = self.par_min[i_g_max] - eps
     self.apply_shifts(par = par_eps)
     del par_eps
     self.fmodel.update_xray_structure(update_f_calc=True)
     t2 = self.get_tg(compute_gradients=False).target()
     self.apply_shifts(par = self.par_min)
     self.fmodel.update_xray_structure(update_f_calc=True)
     self.buffer_ana.append(self.g[i_g_max])
     self.buffer_fin.append((t1-t2)/(eps*2))
Exemplo n.º 7
0
 def finite_difference_test(self):
     if (self.fmodel.r_work() > 1.e-3):
         i_g_max = flex.max_index(flex.abs(self.g))
         eps = 1.e-5
         par_eps = list(self.par_min)
         par_eps[i_g_max] = self.par_min[i_g_max] + eps
         self.apply_shifts(par=par_eps)
         self.fmodel.update_xray_structure(update_f_calc=True)
         t1 = self.get_tg(compute_gradients=False).target()
         par_eps[i_g_max] = self.par_min[i_g_max] - eps
         self.apply_shifts(par=par_eps)
         del par_eps
         self.fmodel.update_xray_structure(update_f_calc=True)
         t2 = self.get_tg(compute_gradients=False).target()
         self.apply_shifts(par=self.par_min)
         self.fmodel.update_xray_structure(update_f_calc=True)
         self.buffer_ana.append(self.g[i_g_max])
         self.buffer_fin.append((t1 - t2) / (eps * 2))
Exemplo n.º 8
0
def unit_cell_histograms(crystals):
    params = [flex.double() for i in range(6)]
    for cryst in crystals:
        unit_cell = cryst.get_unit_cell().parameters()
        for i in range(6):
            params[i].append(unit_cell[i])

    histograms = []
    for i in range(6):
        histograms.append(flex.histogram(params[i], n_slots=100))

    median_unit_cell = uctbx.unit_cell([flex.median(p) for p in params])
    modal_unit_cell = uctbx.unit_cell(
        [h.slot_centers()[flex.max_index(h.slots())] for h in histograms]
    )
    print("Modal unit cell: %s" % str(modal_unit_cell))
    print("Median unit cell: %s" % str(median_unit_cell))

    return histograms
Exemplo n.º 9
0
 def had_phase_transition(self):
   if len(self.differences) < 5: return False
   i_max = flex.max_index(self.differences)
   noise_before = (self.differences
                   < self.noise_level_before*self.differences[i_max])
   before = flex.last_index(noise_before[:i_max], True)
   if before is None: before = -1
   before += 1
   if i_max - before < 4: return False
   negative_after = self.differences < 0
   after = flex.first_index(negative_after[i_max:], True)
   if after is None: return False
   after += i_max
   if after - before < 10: return False
   if len(self.values) - after < 10: return False
   tail_stats = scitbx.math.basic_statistics(self.differences[-5:])
   if (tail_stats.max_absolute
       > self.noise_level_after*self.differences[i_max]): return False
   return True
Exemplo n.º 10
0
 def had_phase_transition(self):
     if len(self.differences) < 5: return False
     i_max = flex.max_index(self.differences)
     noise_before = (self.differences <
                     self.noise_level_before * self.differences[i_max])
     before = flex.last_index(noise_before[:i_max], True)
     if before is None: before = -1
     before += 1
     if i_max - before < 4: return False
     negative_after = self.differences < 0
     after = flex.first_index(negative_after[i_max:], True)
     if after is None: return False
     after += i_max
     if after - before < 10: return False
     if len(self.values) - after < 10: return False
     tail_stats = scitbx.math.basic_statistics(self.differences[-5:])
     if (tail_stats.max_absolute >
             self.noise_level_after * self.differences[i_max]):
         return False
     return True
Exemplo n.º 11
0
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        # require Dij, d_c
        P = Profiler("2. calculate rho density")
        print("finished Dij, now calculating rho_i, the density")
        from xfel.clustering import Rodriguez_Laio_clustering_2014
        R = Rodriguez_Laio_clustering_2014(distance_matrix=self.Dij,
                                           d_c=self.d_c)
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        print("The average rho_i is %5.2f, or %4.1f%%" %
              (ave_rho, 100 * ave_rho / NN))
        i_max = flex.max_index(rho)

        P = Profiler("3.transition")
        print("the index with the highest density is %d" % (i_max))
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in range(NN)]))
        print("delta_i_max", delta_i_max)
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)

        P = Profiler("4. delta")
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)

        P = Profiler("5. find cluster maxima")
        #---- Now hunting for clusters
        cluster_id = flex.int(NN, -1)  # default -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        N_CLUST = 10  # maximum of 10 points to be considered as possible clusters
        MAX_PERCENTILE_DELTA = 0.10  # cluster centers have to be in the top 10% percentile delta
        MAX_PERCENTILE_RHO = 0.75  # cluster centers have to be in the top 75% percentile rho
        n_cluster = 0
        max_n_delta = min(N_CLUST, int(MAX_PERCENTILE_DELTA * NN))
        for ic in range(max_n_delta):
            # test the density, rho
            item_idx = delta_order[ic]
            if delta[item_idx] < 0.25 * delta[
                    delta_order[0]]:  # too low (another heuristic!)
                continue
            item_rho_order = rho_order_list.index(item_idx)
            if item_rho_order / NN < MAX_PERCENTILE_RHO:
                cluster_id[item_idx] = n_cluster
                print(ic, item_idx, item_rho_order, cluster_id[item_idx])
                n_cluster += 1
        print("Found %d clusters" % n_cluster)
        for x in range(NN):
            if cluster_id[x] >= 0:
                print("XC", x, cluster_id[x], rho[x], delta[x])
        self.cluster_id_maxima = cluster_id.deep_copy()

        P = Profiler("6. assign all points")
        R.cluster_assignment(rho_order, cluster_id)

        self.cluster_id_full = cluster_id.deep_copy()

        # assign the halos
        P = Profiler("7. assign halos")
        halo = flex.bool(NN, False)
        border = R.get_border(cluster_id=cluster_id)

        for ic in range(n_cluster
                        ):  #loop thru all border regions; find highest density
            print("cluster", ic, "in border", border.count(True))
            this_border = (cluster_id == ic) & (border == True)
            print(len(this_border), this_border.count(True))
            if this_border.count(True) > 0:
                highest_density = flex.max(rho.select(this_border))
                halo_selection = (rho < highest_density) & (this_border
                                                            == True)
                if halo_selection.count(True) > 0:
                    cluster_id.set_selected(halo_selection, -1)
                core_selection = (cluster_id == ic) & ~halo_selection
                highest_density = flex.max(rho.select(core_selection))
                too_sparse = core_selection & (
                    rho.as_double() < highest_density / 10.
                )  # another heuristic
                if too_sparse.count(True) > 0:
                    cluster_id.set_selected(too_sparse, -1)
        self.cluster_id_final = cluster_id.deep_copy()
        print("%d in the excluded halo" % ((cluster_id == -1).count(True)))
Exemplo n.º 12
0
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        print('finished Dij, now calculating rho_i and density')
        from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL
        R = RL(distance_matrix=self.Dij, d_c=self.d_c)
        #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True)
        if hasattr(self, 'strategy') is False:
            self.strategy = 'default'
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        i_max = flex.max_index(rho)
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in range(NN)]))
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)
        cluster_id = flex.int(NN, -1)  # -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        MAX_PERCENTILE_RHO = self.max_percentile_rho  # cluster centers have to be in the top percentile
        n_cluster = 0
        #
        #
        print('Z_DELTA = ', self.Z_delta)

        pick_top_solution = False
        rho_stdev = flex.mean_and_variance(
            rho.as_double()).unweighted_sample_standard_deviation()
        delta_stdev = flex.mean_and_variance(
            delta).unweighted_sample_standard_deviation()
        if rho_stdev != 0.0 and delta_stdev != 0:
            rho_z = (rho.as_double() -
                     flex.mean(rho.as_double())) / (rho_stdev)
            delta_z = (delta - flex.mean(delta)) / (delta_stdev)
        else:
            pick_top_solution = True
            if rho_stdev == 0.0:
                centroids = [flex.first_index(delta, flex.max(delta))]
            elif delta_stdev == 0.0:
                centroids = [flex.first_index(rho, flex.max(rho))]

        significant_delta = []
        significant_rho = []
        # Define strategy to decide cluster center here. Only one should be true
        debug_fix_clustering = True
        if self.strategy == 'one_cluster':
            debug_fix_clustering = False
            strategy2 = True
        if self.strategy == 'strategy_3':
            debug_fix_clustering = False
            strategy3 = True
            strategy2 = False

        if debug_fix_clustering:
            if not pick_top_solution:
                delta_z_cutoff = min(1.0, max(delta_z))
                rho_z_cutoff = min(1.0, max(rho_z))
                for ic in range(NN):
                    # test the density & rho
                    if delta_z[ic] >= delta_z_cutoff or delta_z[
                            ic] <= -delta_z_cutoff:
                        significant_delta.append(ic)
                    if rho_z[ic] >= rho_z_cutoff or rho_z[ic] <= -rho_z_cutoff:
                        significant_rho.append(ic)
                if True:
                    # Use idea quoted in Rodriguez Laio 2014 paper
                    # " Thus, cluster centers are recognized as points for which the value of delta is anomalously large."
                    centroid_candidates = list(significant_delta)
                    candidate_delta_z = flex.double()
                    for ic in centroid_candidates:
                        if ic == rho_order[0]:
                            delta_z_of_rho_order_0 = delta_z[ic]
                        candidate_delta_z.append(delta_z[ic])
                    i_sorted = flex.sort_permutation(candidate_delta_z,
                                                     reverse=True)
                    # Check that once sorted the top one is not equal to the 2nd or 3rd position
                    # If there is a tie, assign centroid to the first one in rho order
                    centroids = []
                    # rho_order[0] has to be a centroid
                    centroids.append(rho_order[0])

                    #centroids.append(centroid_candidates[i_sorted[0]])
                    for i in range(0, len(i_sorted[:])):
                        if centroid_candidates[i_sorted[i]] == rho_order[0]:
                            continue
                        if delta_z_of_rho_order_0 - candidate_delta_z[
                                i_sorted[i]] > 1.0:
                            if i > 1:
                                if -candidate_delta_z[i_sorted[
                                        i - 1]] + candidate_delta_z[
                                            i_sorted[0]] > 1.0:
                                    centroids.append(
                                        centroid_candidates[i_sorted[i]])
                            else:
                                centroids.append(
                                    centroid_candidates[i_sorted[i]])
                        else:
                            break
                if False:
                    centroid_candidates = list(
                        set(significant_delta).intersection(
                            set(significant_rho)))
                    # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev
                    centroids = []
                    max_delta_z_candidates = -999.9
                    max_rho_z_candidates = -999.9
                    for ic in centroid_candidates:
                        if delta_z[ic] > max_delta_z_candidates:
                            max_delta_z_candidates = delta_z[ic]
                        if rho_z[ic] > max_rho_z_candidates:
                            max_rho_z_candidates = rho_z[ic]
                    for ic in centroid_candidates:
                        if max_delta_z_candidates - delta_z[
                                ic] < 1.0 and max_rho_z_candidates - rho_z[
                                    ic] < 1.0:
                            centroids.append(ic)

            #item_idxs = [delta_order[ic] for ic,centroid in enumerate(centroids)]
            item_idxs = centroids
            for item_idx in item_idxs:
                cluster_id[item_idx] = n_cluster
                print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
                n_cluster += 1
                ####
        elif strategy2:
            # Go through list of clusters, see which one has highest joint rank in both rho and delta lists
            # This will only assign one cluster center based on highest product of rho and delta ranks
            product_list_of_ranks = []
            for ic in range(NN):
                rho_tmp = self.rho[ic]
                delta_tmp = self.delta[ic]
                product_list_of_ranks.append(rho_tmp * delta_tmp)
            import numpy as np
            item_idx = np.argmax(product_list_of_ranks)
            cluster_id[item_idx] = n_cluster  # Only cluster assigned
            print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
            n_cluster += 1
        elif strategy3:
            # use product of delta and rho and pick out top candidates
            # have to use a significance z_score to filter out the very best
            product_list_of_ranks = flex.double()
            for ic in range(NN):
                rho_tmp = self.rho[ic]
                delta_tmp = self.delta[ic]
                product_list_of_ranks.append(rho_tmp * delta_tmp)
            import numpy as np
            iid_sorted = flex.sort_permutation(product_list_of_ranks,
                                               reverse=True)
            cluster_id[
                iid_sorted[0]] = n_cluster  # first point always a cluster
            n_cluster += 1
            print('CLUSTERING_STATS S3', iid_sorted[0],
                  cluster_id[iid_sorted[0]])
            #product_list_of_ranks[iid_sorted[0]]=0.0 # set this to 0.0 so that the mean/stdev does not get biased by one point
            stdev = np.std(product_list_of_ranks)
            mean = np.mean(product_list_of_ranks)
            n_sorted = 3
            #if stdev == 0.0:
            #  n_sorted=1

            z_critical = 3.0  # 2 sigma significance ?
            # Only go through say 3-4 datapoints
            # basically there won't be more than 2-3 lattices on an image realistically
            for iid in iid_sorted[1:n_sorted]:
                z_score = (product_list_of_ranks[iid] - mean) / stdev
                if z_score > z_critical:
                    cluster_id[iid] = n_cluster
                    n_cluster += 1
                    print('CLUSTERING_STATS S3', iid, cluster_id[iid])
                else:
                    break  # No point going over all points once below threshold z_score

        else:
            for ic in range(NN):
                item_idx = delta_order[ic]
                if ic != 0:
                    if delta[item_idx] <= 0.25 * delta[
                            delta_order[0]]:  # too low to be a medoid
                        continue
                item_rho_order = rho_order_list.index(item_idx)
                if (item_rho_order) / NN < MAX_PERCENTILE_RHO:
                    cluster_id[item_idx] = n_cluster
                    print('CLUSTERING_STATS', ic, item_idx, item_rho_order,
                          cluster_id[item_idx])
                    n_cluster += 1
        ###


#
        print('Found %d clusters' % n_cluster)
        for x in range(NN):
            if cluster_id[x] >= 0:
                print("XC", x, cluster_id[x], rho[x], delta[x])
        self.cluster_id_maxima = cluster_id.deep_copy()
        R.cluster_assignment(rho_order, cluster_id, rho)
        self.cluster_id_full = cluster_id.deep_copy()

        #halo = flex.bool(NN,False)
        #border = R.get_border( cluster_id = cluster_id )

        #for ic in range(n_cluster): #loop thru all border regions; find highest density
        #  this_border = (cluster_id == ic) & (border==True)
        #  if this_border.count(True)>0:
        #    highest_density = flex.max(rho.select(this_border))
        #    halo_selection = (rho < highest_density) & (this_border==True)
        #    if halo_selection.count(True)>0:
        #      cluster_id.set_selected(halo_selection,-1)
        #    core_selection = (cluster_id == ic) & ~halo_selection
        #    highest_density = flex.max(rho.select(core_selection))
        #    too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic
        #    if too_sparse.count(True)>0:
        #      cluster_id.set_selected(too_sparse,-1)
        self.cluster_id_final = cluster_id.deep_copy()
def rho_stats(xray_structure, d_min, resolution_factor, electron_sum_radius,
              zero_out_f000):
    n_real = []
    n_half_plus = []
    n_half_minus = []
    s2 = d_min * resolution_factor * 2
    for l in xray_structure.unit_cell().parameters()[:3]:
        nh = ifloor(l / s2)
        n_real.append(2 * nh + 1)
        n_half_plus.append(nh)
        n_half_minus.append(-nh)
    n_real = tuple(n_real)
    n_real_product = matrix.col(n_real).product()
    crystal_gridding = maptbx.crystal_gridding(
        unit_cell=xray_structure.unit_cell(),
        space_group_info=xray_structure.space_group_info(),
        pre_determined_n_real=n_real)
    miller_indices = flex.miller_index()
    miller_indices.reserve(n_real_product)
    for h in flex.nested_loop(n_half_minus, n_half_plus, open_range=False):
        miller_indices.append(h)
    assert miller_indices.size() == n_real_product
    #
    miller_set = miller.set(crystal_symmetry=xray_structure,
                            anomalous_flag=True,
                            indices=miller_indices).sort(by_value="resolution")
    assert miller_set.indices()[0] == (0, 0, 0)
    f_calc = miller_set.structure_factors_from_scatterers(
        xray_structure=xray_structure, algorithm="direct",
        cos_sin_table=False).f_calc()
    if (zero_out_f000):
        f_calc.data()[0] = 0j
    #
    unit_cell_volume = xray_structure.unit_cell().volume()
    voxel_volume = unit_cell_volume / n_real_product
    number_of_miller_indices = []
    rho_max = []
    electron_sums_around_atoms = []
    densities_along_x = []
    for f in [f_calc, f_calc.resolution_filter(d_min=d_min)]:
        assert f.indices()[0] == (0, 0, 0)
        number_of_miller_indices.append(f.indices().size())
        fft_map = miller.fft_map(crystal_gridding=crystal_gridding,
                                 fourier_coefficients=f)
        assert fft_map.n_real() == n_real
        rho = fft_map.real_map_unpadded() / unit_cell_volume
        assert approx_equal(voxel_volume * flex.sum(rho), f_calc.data()[0])
        if (xray_structure.scatterers().size() == 1):
            assert flex.max_index(rho) == 0
            rho_max.append(rho[0])
        else:
            rho_max.append(flex.max(rho))
        site_cart = xray_structure.sites_cart()[0]
        gias = maptbx.grid_indices_around_sites(
            unit_cell=xray_structure.unit_cell(),
            fft_n_real=n_real,
            fft_m_real=n_real,
            sites_cart=flex.vec3_double([site_cart]),
            site_radii=flex.double([electron_sum_radius]))
        electron_sums_around_atoms.append(
            flex.sum(rho.as_1d().select(gias)) * voxel_volume)
        #
        a = xray_structure.unit_cell().parameters()[0]
        nx = n_real[0]
        nxh = nx // 2
        x = []
        y = []
        for ix in range(-nxh, nxh + 1):
            x.append(a * ix / nx)
            y.append(rho[(ix % nx, 0, 0)])
        densities_along_x.append((x, y))
    #
    print(
        "%3.1f %4.2f %-12s %5d %5d | %6.3f %6.3f | %6.3f %6.3f | %4.2f %5.1f" %
        (d_min, resolution_factor, n_real, number_of_miller_indices[0],
         number_of_miller_indices[1], electron_sums_around_atoms[0],
         electron_sums_around_atoms[1], rho_max[0], rho_max[1],
         f_calc.data()[0].real, u_as_b(xray_structure.scatterers()[0].u_iso)))
    #
    return densities_along_x
def rho_stats(
      xray_structure,
      d_min,
      resolution_factor,
      electron_sum_radius,
      zero_out_f000):
  n_real = []
  n_half_plus = []
  n_half_minus = []
  s2 = d_min * resolution_factor * 2
  for l in xray_structure.unit_cell().parameters()[:3]:
    nh = ifloor(l / s2)
    n_real.append(2*nh+1)
    n_half_plus.append(nh)
    n_half_minus.append(-nh)
  n_real = tuple(n_real)
  n_real_product = matrix.col(n_real).product()
  crystal_gridding = maptbx.crystal_gridding(
    unit_cell=xray_structure.unit_cell(),
    space_group_info=xray_structure.space_group_info(),
    pre_determined_n_real=n_real)
  miller_indices = flex.miller_index()
  miller_indices.reserve(n_real_product)
  for h in flex.nested_loop(n_half_minus, n_half_plus, open_range=False):
    miller_indices.append(h)
  assert miller_indices.size() == n_real_product
  #
  miller_set = miller.set(
    crystal_symmetry=xray_structure,
    anomalous_flag=True,
    indices=miller_indices).sort(by_value="resolution")
  assert miller_set.indices()[0] == (0,0,0)
  f_calc = miller_set.structure_factors_from_scatterers(
    xray_structure=xray_structure,
    algorithm="direct",
    cos_sin_table=False).f_calc()
  if (zero_out_f000):
    f_calc.data()[0] = 0j
  #
  unit_cell_volume = xray_structure.unit_cell().volume()
  voxel_volume = unit_cell_volume / n_real_product
  number_of_miller_indices = []
  rho_max = []
  electron_sums_around_atoms = []
  densities_along_x = []
  for f in [f_calc, f_calc.resolution_filter(d_min=d_min)]:
    assert f.indices()[0] == (0,0,0)
    number_of_miller_indices.append(f.indices().size())
    fft_map = miller.fft_map(
      crystal_gridding=crystal_gridding,
      fourier_coefficients=f)
    assert fft_map.n_real() == n_real
    rho = fft_map.real_map_unpadded() / unit_cell_volume
    assert approx_equal(voxel_volume*flex.sum(rho), f_calc.data()[0])
    if (xray_structure.scatterers().size() == 1):
      assert flex.max_index(rho) == 0
      rho_max.append(rho[0])
    else:
      rho_max.append(flex.max(rho))
    site_cart = xray_structure.sites_cart()[0]
    gias = maptbx.grid_indices_around_sites(
      unit_cell=xray_structure.unit_cell(),
      fft_n_real=n_real,
      fft_m_real=n_real,
      sites_cart=flex.vec3_double([site_cart]),
      site_radii=flex.double([electron_sum_radius]))
    electron_sums_around_atoms.append(
      flex.sum(rho.as_1d().select(gias))*voxel_volume)
    #
    a = xray_structure.unit_cell().parameters()[0]
    nx = n_real[0]
    nxh = nx//2
    x = []
    y = []
    for ix in xrange(-nxh,nxh+1):
      x.append(a*ix/nx)
      y.append(rho[(ix%nx,0,0)])
    densities_along_x.append((x,y))
  #
  print \
    "%3.1f %4.2f %-12s %5d %5d | %6.3f %6.3f | %6.3f %6.3f | %4.2f %5.1f" % (
      d_min,
      resolution_factor,
      n_real,
      number_of_miller_indices[0],
      number_of_miller_indices[1],
      electron_sums_around_atoms[0],
      electron_sums_around_atoms[1],
      rho_max[0],
      rho_max[1],
      f_calc.data()[0].real,
      u_as_b(xray_structure.scatterers()[0].u_iso))
  #
  return densities_along_x
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        print('finished Dij, now calculating rho_i and density')
        from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL
        R = RL(distance_matrix=self.Dij, d_c=self.d_c)
        #from IPython import embed; embed(); exit()
        #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True)
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        i_max = flex.max_index(rho)
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in range(NN)]))
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)
        cluster_id = flex.int(NN, -1)  # -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        MAX_PERCENTILE_RHO = self.max_percentile_rho  # cluster centers have to be in the top percentile
        n_cluster = 0
        #
        pick_top_solution = False
        rho_stdev = flex.mean_and_variance(
            rho.as_double()).unweighted_sample_standard_deviation()
        delta_stdev = flex.mean_and_variance(
            delta).unweighted_sample_standard_deviation()
        if rho_stdev != 0.0 and delta_stdev != 0:
            rho_z = (rho.as_double() -
                     flex.mean(rho.as_double())) / (rho_stdev)
            delta_z = (delta - flex.mean(delta)) / (delta_stdev)
        else:
            pick_top_solution = True
            if rho_stdev == 0.0:
                centroids = [flex.first_index(delta, flex.max(delta))]
            elif delta_stdev == 0.0:
                centroids = [flex.first_index(rho, flex.max(rho))]

        significant_delta = []
        significant_rho = []
        debug_fix_clustering = True
        if debug_fix_clustering:
            if not pick_top_solution:
                delta_z_cutoff = min(1.0, max(delta_z))
                rho_z_cutoff = min(1.0, max(rho_z))
                for ic in range(NN):
                    # test the density & rho
                    if delta_z[ic] >= delta_z_cutoff:
                        significant_delta.append(ic)
                    if rho_z[ic] >= rho_z_cutoff:
                        significant_rho.append(ic)
                centroid_candidates = list(
                    set(significant_delta).intersection(set(significant_rho)))
                # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev
                centroids = []
                max_delta_z_candidates = -999.9
                max_rho_z_candidates = -999.9
                for ic in centroid_candidates:
                    if delta_z[ic] > max_delta_z_candidates:
                        max_delta_z_candidates = delta_z[ic]
                    if rho_z[ic] > max_rho_z_candidates:
                        max_rho_z_candidates = rho_z[ic]
                for ic in centroid_candidates:
                    if max_delta_z_candidates - delta_z[
                            ic] < 1.0 and max_rho_z_candidates - rho_z[
                                ic] < 1.0:
                        centroids.append(ic)

            item_idxs = [
                delta_order[ic] for ic, centroid in enumerate(centroids)
            ]
            for item_idx in item_idxs:
                cluster_id[item_idx] = n_cluster
                print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
                n_cluster += 1
                ####
        else:
            for ic in range(NN):
                item_idx = delta_order[ic]
                if ic != 0:
                    if delta[item_idx] <= 0.25 * delta[
                            delta_order[0]]:  # too low to be a medoid
                        continue
                item_rho_order = rho_order_list.index(item_idx)
                if (item_rho_order) / NN < MAX_PERCENTILE_RHO:
                    cluster_id[item_idx] = n_cluster
                    print('CLUSTERING_STATS', ic, item_idx, item_rho_order,
                          cluster_id[item_idx])
                    n_cluster += 1
        ###


#
#
        print('Found %d clusters' % n_cluster)
        for x in range(NN):
            if cluster_id[x] >= 0:
                print("XC", x, cluster_id[x], rho[x], delta[x])
        self.cluster_id_maxima = cluster_id.deep_copy()
        R.cluster_assignment(rho_order, cluster_id)
        self.cluster_id_full = cluster_id.deep_copy()

        #halo = flex.bool(NN,False)
        #border = R.get_border( cluster_id = cluster_id )

        #for ic in range(n_cluster): #loop thru all border regions; find highest density
        #  this_border = (cluster_id == ic) & (border==True)
        #  if this_border.count(True)>0:
        #    highest_density = flex.max(rho.select(this_border))
        #    halo_selection = (rho < highest_density) & (this_border==True)
        #    if halo_selection.count(True)>0:
        #      cluster_id.set_selected(halo_selection,-1)
        #    core_selection = (cluster_id == ic) & ~halo_selection
        #    highest_density = flex.max(rho.select(core_selection))
        #    too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic
        #    if too_sparse.count(True)>0:
        #      cluster_id.set_selected(too_sparse,-1)
        self.cluster_id_final = cluster_id.deep_copy()
Exemplo n.º 16
0
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        # require Dij, d_c
        P = Profiler("2. calculate rho density")
        print "finished Dij, now calculating rho_i, the density"
        from xfel.clustering import Rodriguez_Laio_clustering_2014
        # alternative clustering algorithms: see http://scikit-learn.org/stable/modules/clustering.html
        # also see https://cran.r-project.org/web/packages/dbscan/vignettes/hdbscan.html
        # see also https://en.wikipedia.org/wiki/Hausdorff_dimension

        R = Rodriguez_Laio_clustering_2014(distance_matrix=self.Dij,
                                           d_c=self.d_c)
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        print "The average rho_i is %5.2f, or %4.1f%%" % (ave_rho,
                                                          100 * ave_rho / NN)
        i_max = flex.max_index(rho)

        P = Profiler("3.transition")
        print "the index with the highest density is %d" % (i_max)
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in xrange(NN)]))
        print "delta_i_max", delta_i_max
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)

        P = Profiler("4. delta")
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)

        P = Profiler("5. find cluster maxima")
        #---- Now hunting for clusters ---Lot's of room for improvement (or simplification) here!!!
        cluster_id = flex.int(NN, -1)  # default -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        N_CLUST = 10  # maximum of 10 points to be considered as possible clusters
        #MAX_PERCENTILE_DELTA = 0.99 # cluster centers have to be in the top 10% percentile delta
        MAX_PERCENTILE_RHO = 0.99  # cluster centers have to be in the top 75% percentile rho
        n_cluster = 0
        #max_n_delta = min(N_CLUST, int(MAX_PERCENTILE_DELTA*NN))
        for ic in xrange(NN):
            # test the density, rho
            item_idx = delta_order[ic]
            if delta[item_idx] > 100:
                print "A: iteration", ic, "delta", delta[
                    item_idx], delta[item_idx] < 0.25 * delta[delta_order[0]]
            if delta[item_idx] < 0.25 * delta[
                    delta_order[0]]:  # too low (another heuristic!)
                continue
            item_rho_order = rho_order_list.index(item_idx)
            if delta[item_idx] > 100:
                print "B: iteration", ic, item_rho_order, item_rho_order / NN, MAX_PERCENTILE_RHO
            if item_rho_order / NN < MAX_PERCENTILE_RHO:
                cluster_id[item_idx] = n_cluster
                print ic, item_idx, item_rho_order, cluster_id[item_idx]
                n_cluster += 1
        print "Found %d clusters" % n_cluster
        for x in xrange(NN):
            if cluster_id[x] >= 0:
                print "XC", x, cluster_id[x], rho[x], delta[x]
        self.cluster_id_maxima = cluster_id.deep_copy()

        P = Profiler("6. assign all points")
        R.cluster_assignment(rho_order, cluster_id)

        self.cluster_id_full = cluster_id.deep_copy()

        # assign the halos
        P = Profiler("7. assign halos")
        halo = flex.bool(NN, False)
        border = R.get_border(cluster_id=cluster_id)

        for ic in range(n_cluster
                        ):  #loop thru all border regions; find highest density
            print "cluster", ic, "in border", border.count(True)
            this_border = (cluster_id == ic) & (border == True)
            print len(this_border), this_border.count(True)
            if this_border.count(True) > 0:
                highest_density = flex.max(rho.select(this_border))
                halo_selection = (rho < highest_density) & (this_border
                                                            == True)
                if halo_selection.count(True) > 0:
                    cluster_id.set_selected(halo_selection, -1)
                core_selection = (cluster_id == ic) & ~halo_selection
                highest_density = flex.max(rho.select(core_selection))
                too_sparse = core_selection & (
                    rho.as_double() < highest_density / 10.
                )  # another heuristic
                if too_sparse.count(True) > 0:
                    cluster_id.set_selected(too_sparse, -1)
        self.cluster_id_final = cluster_id.deep_copy()
        print "%d in the excluded halo" % ((cluster_id == -1).count(True))