def __init__(self, atom_group, global_b_facs_stats=None): self.atom_group = atom_group # Meta self.residue_class = iotbx.pdb.common_residue_names_get_class( atom_group.resname) # B-Factors self.b_facs = atom_group.atoms().extract_b() self.b_facs_stats = basic_statistics(atom_group.atoms().extract_b()) # B-Factors Z-Statistics if global_b_facs_stats is not None: self.b_facs_z = global_b_facs_stats.to_z_score( b_vals=atom_group.atoms().extract_b()) self.b_facs_z_stats = basic_statistics(self.b_factors_z) else: self.b_facs_z = None self.b_facs_z_stats = None # Occupancies self.occies = atom_group.atoms().extract_occ() self.occies_stats = basic_statistics(self.occies) # Coordinates self.centroid = atom_group.atoms().extract_xyz().mean()
def exercise_variate_generators(): from scitbx.random \ import variate, normal_distribution, bernoulli_distribution, \ gamma_distribution, poisson_distribution for i in range(10): scitbx.random.set_random_seed(0) g = variate(normal_distribution()) assert approx_equal(g(), -0.917787219374) assert approx_equal( g(10), (1.21838707856, 1.732426915, 0.838038157555, -0.296895169923, 0.246451144946, -0.635474652255, -0.0980626986425, 0.36458295417, 0.534073780268, -0.665073136294)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 0, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) assert approx_equal(stat.skew, 0, eps=0.005) assert approx_equal(stat.kurtosis, 3, eps=0.005) bernoulli_seq = variate(bernoulli_distribution(0.1)) for b in itertools.islice(bernoulli_seq, 10): assert b in (True, False) bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000)) assert approx_equal(bernoulli_sample.count(True) / len(bernoulli_sample), 0.1, eps=0.01) # Boost 1.64 changes the exponential distribution to use Ziggurat algorithm scitbx.random.set_random_seed(0) g = variate(gamma_distribution()) if (boost_version < 106400): assert approx_equal(g(), 0.79587450456577546) assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893)) else: assert approx_equal(g(), 0.864758191783) assert approx_equal(g(2), (1.36660841837, 2.26740986094)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 1, eps=0.005) assert approx_equal(stat.skew, 2, eps=0.01) assert approx_equal(stat.biased_variance, 1, eps=0.005) scitbx.random.set_random_seed(0) g = variate(gamma_distribution(alpha=2, beta=3)) assert approx_equal(g(), 16.670850592722729) assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 6, eps=0.005) assert approx_equal(stat.skew, 2 / math.sqrt(2), eps=0.05) assert approx_equal(stat.biased_variance, 18, eps=0.05) mean = 10.0 pv = variate(poisson_distribution(mean)) draws = pv(1000000).as_double() m = flex.mean(draws) v = flex.mean(draws * draws) - m * m assert approx_equal(m, mean, eps=0.05) assert approx_equal(v, mean, eps=0.05)
def exercise_variate_generators(): from scitbx.random \ import variate, normal_distribution, bernoulli_distribution, \ gamma_distribution, poisson_distribution for i in xrange(10): scitbx.random.set_random_seed(0) g = variate(normal_distribution()) assert approx_equal(g(), -1.2780081289048213) assert approx_equal(g(10), (-0.40474189234755492, -0.41845505596083288, -1.8825790263067721, -1.5779112018107659, -1.1888174422378859, -1.8619619179878537, -0.53946818661388318, -1.2400941724410812, 0.64511959841907285, -0.59934120033270688)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 0, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) assert approx_equal(stat.skew, 0, eps=0.005) assert approx_equal(stat.kurtosis, 3, eps=0.005) bernoulli_seq = variate(bernoulli_distribution(0.1)) for b in itertools.islice(bernoulli_seq, 10): assert b in (True, False) bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000)) assert approx_equal( bernoulli_sample.count(True)/len(bernoulli_sample), 0.1, eps = 0.01) scitbx.random.set_random_seed(0) g = variate(gamma_distribution()) assert approx_equal(g(), 0.79587450456577546) assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 1, eps=0.005) assert approx_equal(stat.skew, 2, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) scitbx.random.set_random_seed(0) g = variate(gamma_distribution(alpha=2, beta=3)) assert approx_equal(g(), 16.670850592722729) assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 6, eps=0.005) assert approx_equal(stat.skew, 2/math.sqrt(2), eps=0.05) assert approx_equal(stat.biased_variance, 18, eps=0.05) mean = 10.0 pv = variate(poisson_distribution(mean)) draws = pv(1000000).as_double() m = flex.mean(draws) v = flex.mean(draws*draws) - m*m assert approx_equal(m,mean,eps=0.05) assert approx_equal(v,mean,eps=0.05)
def exercise_variate_generators(): from scitbx.random \ import variate, normal_distribution, bernoulli_distribution, \ gamma_distribution, poisson_distribution for i in xrange(10): scitbx.random.set_random_seed(0) g = variate(normal_distribution()) assert approx_equal(g(), -1.2780081289048213) assert approx_equal( g(10), (-0.40474189234755492, -0.41845505596083288, -1.8825790263067721, -1.5779112018107659, -1.1888174422378859, -1.8619619179878537, -0.53946818661388318, -1.2400941724410812, 0.64511959841907285, -0.59934120033270688)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 0, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) assert approx_equal(stat.skew, 0, eps=0.005) assert approx_equal(stat.kurtosis, 3, eps=0.005) bernoulli_seq = variate(bernoulli_distribution(0.1)) for b in itertools.islice(bernoulli_seq, 10): assert b in (True, False) bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000)) assert approx_equal(bernoulli_sample.count(True) / len(bernoulli_sample), 0.1, eps=0.01) scitbx.random.set_random_seed(0) g = variate(gamma_distribution()) assert approx_equal(g(), 0.79587450456577546) assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 1, eps=0.005) assert approx_equal(stat.skew, 2, eps=0.005) assert approx_equal(stat.biased_variance, 1, eps=0.005) scitbx.random.set_random_seed(0) g = variate(gamma_distribution(alpha=2, beta=3)) assert approx_equal(g(), 16.670850592722729) assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873)) stat = basic_statistics(flex.double(itertools.islice(g, 1000000))) assert approx_equal(stat.mean, 6, eps=0.005) assert approx_equal(stat.skew, 2 / math.sqrt(2), eps=0.05) assert approx_equal(stat.biased_variance, 18, eps=0.05) mean = 10.0 pv = variate(poisson_distribution(mean)) draws = pv(1000000).as_double() m = flex.mean(draws) v = flex.mean(draws * draws) - m * m assert approx_equal(m, mean, eps=0.05) assert approx_equal(v, mean, eps=0.05)
def _calculate_statistics(self, observations, uncertainties): """Calculate statistics for one set of observations and uncertainties""" guess_factor = 0.001 stats_obj = basic_statistics( flex.double(numpy.ascontiguousarray(observations))) stdv = stats_obj.bias_corrected_standard_deviation sadj = estimate_true_underlying_sd(obs_vals=observations, obs_error=uncertainties, est_sigma=stdv * guess_factor) skew = stats_obj.skew kurt = stats_obj.kurtosis bimo = (skew**2 + 1) / kurt return (stdv, sadj, skew, kurt, bimo)
def from_pdb(cls, pdb_input=None, pdb_hierarchy=None): """Calculate the b-factor statistics of a model""" assert [pdb_input, pdb_hierarchy ].count(None) == 1, 'Provide pdb_input OR pdb_hierarchy' if pdb_input: pdb_hierarchy = pdb_input.construct_hierarchy() cache = pdb_hierarchy.atom_selection_cache() all_b = non_h(hierarchy=pdb_hierarchy, cache=cache, copy=True).atoms().extract_b() protein_b = protein(hierarchy=pdb_hierarchy, cache=cache, copy=True).atoms().extract_b() backbone_b = backbone(hierarchy=pdb_hierarchy, cache=cache, copy=True).atoms().extract_b() sidechain_b = sidechains(hierarchy=pdb_hierarchy, cache=cache, copy=True).atoms().extract_b() return cls(all=basic_statistics(all_b), protein=basic_statistics(protein_b), backbone=basic_statistics(backbone_b), sidechain=basic_statistics(sidechain_b))
def run_detail(self, reflections): # Get pre-created resolution binning objects from the parameters self.resolution_binner = self.params.statistics.resolution_binner self.hkl_resolution_bins = self.params.statistics.hkl_resolution_bins # How many bins do we have? n_bins = self.resolution_binner.n_bins_all() # (self.params.statistics.n_bins + 2), 2 - to account for the hkls outside of the binner resolution range # To enable MPI all-rank reduction, every rank must initialize statistics array(s), even if the rank doesn't have any reflections. self.I_sum = flex.double(n_bins, 0.0) # a sum of the weighted mean intensities from all asu HKLs self.Isig_sum = flex.double(n_bins, 0.0) # a sum of I/sigma from all asu HKLs self.Isig_list = [flex.double() for _ in range(n_bins)] self.n_sum = flex.int(n_bins, 0) # number of theoretically prediced asu hkls self.m_sum = flex.int(n_bins, 0) # number of observed asu hkls self.mm_sum = flex.int(n_bins, 0) # number of observed asu hkls with multiplicity > 1 # Calculate, format and output statistics for each rank self.logger.log("Calculating intensity statistics...") self.calculate_intensity_statistics(reflections) Intensity_Table = self.build_intensity_table( I_sum = self.I_sum, Isig_sum = self.Isig_sum, n_sum = self.n_sum, m_sum = self.m_sum, mm_sum = self.mm_sum, unmerged_meanIsig = None, unmerged_stddevIsig = None, unmerged_skewIsig = None) if self.params.output.log_level == 0: self.logger.log(Intensity_Table.get_table_text(), rank_prepend=False) # Accumulate statistics from all ranks all_ranks_I_sum = self.mpi_helper.cumulative_flex(self.I_sum, flex.double) all_ranks_Isig_sum = self.mpi_helper.cumulative_flex(self.Isig_sum, flex.double) all_ranks_n_sum = self.mpi_helper.cumulative_flex(self.n_sum, flex.int) all_ranks_m_sum = self.mpi_helper.cumulative_flex(self.m_sum, flex.int) all_ranks_mm_sum = self.mpi_helper.cumulative_flex(self.mm_sum, flex.int) all_ranks_unmerged_meanIsig = [] all_ranks_unmerged_stddevIsig = [] all_ranks_unmerged_skewIsig = [] for bin_id in range(n_bins): all_ranks_isigi_list = self.mpi_helper.comm.gather(self.Isig_list[bin_id], 0) if self.mpi_helper.rank == 0: all_isigi = flex.double() for ranklist in all_ranks_isigi_list: all_isigi.extend(ranklist) stats = basic_statistics(all_isigi) all_ranks_unmerged_meanIsig.append(stats.mean) all_ranks_unmerged_stddevIsig.append(stats.bias_corrected_standard_deviation) all_ranks_unmerged_skewIsig.append(stats.skew) # Calculate, format and output all-rank total statistics if self.mpi_helper.rank == 0: Intensity_Table = self.build_intensity_table( I_sum = all_ranks_I_sum, Isig_sum = all_ranks_Isig_sum, n_sum = all_ranks_n_sum, m_sum = all_ranks_m_sum, mm_sum = all_ranks_mm_sum, unmerged_meanIsig = all_ranks_unmerged_meanIsig, unmerged_stddevIsig = all_ranks_unmerged_stddevIsig, unmerged_skewIsig = all_ranks_unmerged_skewIsig) self.logger.main_log(Intensity_Table.get_table_text()) if self.last_bin_incomplete: self.logger.main_log("Warning: the last resolution shell is incomplete. If your data was integrated to that resolution,\nconsider using scaling.resolution_scalar=%f or lower."%self.suggested_resolution_scalar)
def run(self): """Process the dataset""" dataset, dataset_map, grid, map_analyser, args, verbose = self.data # TODO Hardcoded check - to be removed? TODO assert dataset_map.is_sparse() # ============================================================================> # Prepare output objects # ============================================================================> log_strs = [] log_file = dataset.file_manager.get_file('dataset_log') log = Log(log_file=log_file, verbose=False, silent=True) # ============================================================================> # Build new blob search object # ============================================================================> blob_finder = PanddaZMapAnalyser(params=args.params.z_map_analysis, grid=grid, log=log) print('Writing log for dataset {!s} to ...{}'.format( dataset.tag, log_file[log_file.index('processed'):])) # ============================================================================> # Extract the global mask object from the grid # ============================================================================> dset_total_temp = grid.global_mask().total_mask_binary().copy() # ============================================================================> # Generate symmetry masks for this dataset # ============================================================================> log.bar() log('Masking symetry contacts from Z-map.') # Generate symmetry contacts for this dataset and align to reference frame dataset_sym_copies = dataset.model.crystal_contacts( distance_cutoff=args.params.masks.outer_mask + 5, combine_copies=True) dataset_sym_copies.atoms().set_xyz( dataset.model.alignment.nat2ref( dataset_sym_copies.atoms().extract_xyz())) # Only need to write if writing reference frame maps if args.output.developer.write_reference_frame_maps: dataset_sym_copies.write_pdb_file( dataset.file_manager.get_file('symmetry_copies')) # Extract protein atoms from the symmetry copies dataset_sym_sites_cart = non_water( dataset_sym_copies).atoms().extract_xyz() # Generate symmetry contacts grid mask dataset_mask = GridMask(parent=grid, sites_cart=dataset_sym_sites_cart, max_dist=args.params.masks.outer_mask, min_dist=args.params.masks.inner_mask_symmetry) # Combine with the total mask to generate custom mask for this dataset dset_total_temp.put(dataset_mask.inner_mask_indices(), 0) dset_total_idxs = numpy.where(dset_total_temp)[0] log('After masking with symmetry contacts: {} points for Z-map analysis' .format(len(dset_total_idxs))) # Write map of grid + symmetry mask if args.output.developer.write_reference_frame_grid_masks: grid.write_indices_as_map( indices=dset_total_idxs, f_name=dataset.file_manager.get_file('grid_mask'), origin_shift=True) # ============================================================================> # Generate custom masks for this dataset # ============================================================================> if args.params.z_map_analysis.masks.selection_string is not None: log.bar() log('Applying custom mask to the Z-map: "{}"'.format( args.params.z_map_analysis.masks.selection_string)) cache = dataset.model.hierarchy.atom_selection_cache() custom_mask_selection = cache.selection( args.params.z_map_analysis.masks.selection_string) custom_mask_sites = dataset.model.hierarchy.select( custom_mask_selection).atoms().extract_xyz() log('Masking with {} atoms'.format(len(custom_mask_sites))) # Generate custom grid mask dataset_mask = GridMask( parent=grid, sites_cart=custom_mask_sites, max_dist=args.params.z_map_analysis.masks.outer_mask, min_dist=args.params.z_map_analysis.masks.inner_mask) # Combine with the total mask to generate custom mask for this dataset dset_total_temp *= dataset_mask.total_mask_binary() dset_total_idxs = numpy.where(dset_total_temp)[0] log('After masking with custom mask: {} points for Z-map analysis'. format(len(dset_total_idxs))) # Write out mask grid.write_indices_as_map( indices=dset_total_idxs, f_name=dataset.file_manager.get_file('z_map_mask'), origin_shift=True) # ============================================================================> ##### # CALCULATE Z-MAPS AND LOOK FOR LARGE BLOBS ##### # ============================================================================> # Check maps and that all maps are sparse # ============================================================================> assert dataset_map.data is not None, 'Something has gone wrong - this dataset has no loaded map' assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.mean_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.medn_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.stds_map.is_sparse() assert dataset_map.is_sparse( ) is map_analyser.statistical_maps.sadj_map.is_sparse() # ============================================================================> # CALCULATE MEAN-DIFF MAPS # ============================================================================> mean_diff_map = map_analyser.calculate_z_map(map=dataset_map, method='none') # # ============================================================================> # # NAIVE Z-MAP - NOT USING UNCERTAINTY ESTIMATION OR ADJUSTED STDS # # ============================================================================> # z_map_naive = map_analyser.calculate_z_map(map=dataset_map, method='naive') # z_map_naive_normalised = z_map_naive.normalised_copy() # ============================================================================> # UNCERTAINTY Z-MAP - NOT USING ADJUSTED STDS # ============================================================================> z_map_uncty = map_analyser.calculate_z_map( map=dataset_map, uncertainty=dataset_map.meta.map_uncertainty, method='uncertainty') z_map_uncty_normalised = z_map_uncty.normalised_copy() # ============================================================================> # ADJUSTED+UNCERTAINTY Z-MAP # ============================================================================> z_map_compl = map_analyser.calculate_z_map( map=dataset_map, uncertainty=dataset_map.meta.map_uncertainty, method='adjusted+uncertainty') z_map_compl_normalised = z_map_compl.normalised_copy() # ============================================================================> # SELECT WHICH MAP TO DO THE BLOB SEARCHING ON # ============================================================================> # if args.params.statistical_maps.z_map_type == 'naive': # z_map = z_map_naive_normalised # z_map_stats = basic_statistics(flex.double(z_map_naive.data)) if args.params.statistical_maps.z_map_type == 'uncertainty': z_map = z_map_uncty_normalised z_map_stats = basic_statistics(flex.double(z_map_uncty.data)) elif args.params.statistical_maps.z_map_type == 'adjusted+uncertainty': z_map = z_map_compl_normalised z_map_stats = basic_statistics(flex.double(z_map_compl.data)) else: raise Exception('Invalid Z-map type') # ============================================================================> # RECORD Z-MAP FOR STATISTICS # ============================================================================> # Calculate statistics of z-maps dataset_map.meta.z_mean = z_map_stats.mean dataset_map.meta.z_stdv = z_map_stats.bias_corrected_standard_deviation dataset_map.meta.z_skew = z_map_stats.skew dataset_map.meta.z_kurt = z_map_stats.kurtosis # ============================================================================> z_map.meta.type = 'z-map' # ============================================================================> # ============================================================================> ##### # WRITE ALL MAP DISTRIBUTIONS (THESE DON'T USE MUCH SPACE) ##### # ============================================================================> # Sampled Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('s_map_png'), plot_vals=dataset_map.get_map_data(sparse=True)) # Mean-Difference analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('d_mean_map_png'), plot_vals=mean_diff_map.get_map_data(sparse=True)) # # Naive Z-Map # analyse_graphs.map_value_distribution(f_name = dataset.file_manager.get_file('z_map_naive_png'), # plot_vals = z_map_naive.get_map_data(sparse=True), # plot_normal = True) # # Normalised Naive Z-Map # analyse_graphs.map_value_distribution(f_name = dataset.file_manager.get_file('z_map_naive_normalised_png'), # plot_vals = z_map_naive_normalised.get_map_data(sparse=True), # plot_normal = True) # Uncertainty Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('z_map_uncertainty_png'), plot_vals=z_map_uncty.get_map_data(sparse=True), plot_normal=True) # Normalised Uncertainty Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file( 'z_map_uncertainty_normalised_png'), plot_vals=z_map_uncty_normalised.get_map_data(sparse=True), plot_normal=True) # Corrected Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file('z_map_corrected_png'), plot_vals=z_map_compl.get_map_data(sparse=True), plot_normal=True) # Normalised Corrected Z-Map analyse_graphs.map_value_distribution( f_name=dataset.file_manager.get_file( 'z_map_corrected_normalised_png'), plot_vals=z_map_compl_normalised.get_map_data(sparse=True), plot_normal=True) # Plot Q-Q Plot of Corrected Z-Map to see how normal it is analyse_graphs.qq_plot_against_normal( f_name=dataset.file_manager.get_file('z_map_qq_plot_png'), plot_vals=z_map_compl_normalised.get_map_data(sparse=True)) # ============================================================================> ##### # LOOK FOR CLUSTERS OF LARGE Z-SCORES ##### # ============================================================================> # Contour the grid at a particular Z-Value # ============================================================================> num_clusters, z_clusters = blob_finder.cluster_high_z_values( z_map_data=z_map.get_map_data(sparse=False), point_mask_idx=dset_total_idxs) # ============================================================================> # Too many points to cluster -- probably a bad dataset # ============================================================================> if num_clusters == -1: # This dataset is too noisy to analyse - flag! log_strs.append( 'Z-Map too noisy to analyse -- not sure what has gone wrong here...' ) return dataset, dataset_map.meta, log_strs # ============================================================================> ##### # FILTER/SELECT CLUSTERS OF Z-SCORES ##### # ============================================================================> # Filter the clusters by size and peak height # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_1( z_clusters=z_clusters) blob_finder.validate_clusters(z_clusters) if num_clusters == 0: log_strs.append('===> Minimum cluster peak/size not reached.') # ============================================================================> # Filter the clusters by distance from protein # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_2( z_clusters=z_clusters, dataset=dataset) blob_finder.validate_clusters(z_clusters) if num_clusters == 0: log_strs.append('===> Clusters too far from protein.') # ============================================================================> # Group Nearby Clusters Together # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.group_clusters( z_clusters=z_clusters) blob_finder.validate_clusters(z_clusters) # ============================================================================> # Filter the clusters by symmetry equivalence # ============================================================================> if num_clusters > 0: num_clusters, z_clusters = blob_finder.filter_z_clusters_3( z_clusters=z_clusters, dataset=dataset) blob_finder.validate_clusters(z_clusters) # ============================================================================> ##### # WRITE MAPS ##### # ============================================================================> # write dataset maps in the reference frame # ============================================================================> if args.output.developer.write_reference_frame_maps: dataset_map.to_file( filename=dataset.file_manager.get_file('sampled_map'), space_group=grid.space_group()) mean_diff_map.to_file( filename=dataset.file_manager.get_file('mean_diff_map'), space_group=grid.space_group()) z_map.to_file(filename=dataset.file_manager.get_file('z_map'), space_group=grid.space_group()) # ============================================================================> # Write out mask of the high z-values # ============================================================================> if args.output.developer.write_reference_frame_grid_masks: # Write map of where the blobs are (high-Z mask) highz_points = [] [highz_points.extend(list(x[0])) for x in z_clusters] highz_points = [map(int, v) for v in highz_points] highz_indices = map(grid.indexer(), list(highz_points)) grid.write_indices_as_map( indices=highz_indices, f_name=dataset.file_manager.get_file('high_z_mask'), origin_shift=True) # ============================================================================> # Write different Z-Maps? (Probably only needed for testing) # ============================================================================> if args.output.developer.write_reference_frame_all_z_map_types: # z_map_naive.to_file(filename=dataset.file_manager.get_file('z_map_naive'), space_group=grid.space_group()) # z_map_naive_normalised.to_file(filename=dataset.file_manager.get_file('z_map_naive_normalised'), space_group=grid.space_group()) z_map_uncty.to_file( filename=dataset.file_manager.get_file('z_map_uncertainty'), space_group=grid.space_group()) z_map_uncty_normalised.to_file( filename=dataset.file_manager.get_file( 'z_map_uncertainty_normalised'), space_group=grid.space_group()) z_map_compl.to_file( filename=dataset.file_manager.get_file('z_map_corrected'), space_group=grid.space_group()) z_map_compl_normalised.to_file( filename=dataset.file_manager.get_file( 'z_map_corrected_normalised'), space_group=grid.space_group()) # ============================================================================> # Skip to next dataset if no clusters found # ============================================================================> if num_clusters > 0: log_strs.append('===> {!s} Cluster(s) found.'.format(num_clusters)) else: log_strs.append('===> No Clusters found.') return (dataset, dataset_map.meta, log_strs) assert num_clusters > 0, 'NUMBER OF CLUSTERS AFTER FILTERING == 0!' # ============================================================================> # Extract the map data in non-sparse format # ============================================================================> dset_map_data = dataset_map.get_map_data(sparse=False) avrg_map_data = map_analyser.average_map().get_map_data(sparse=False) # ============================================================================> # Process the identified features # ============================================================================> for event_idx, (event_points, event_values) in enumerate(z_clusters): # Number events from 1 event_num = event_idx + 1 # Create a unique identifier for this event event_key = (dataset.tag, event_num) # ============================================================================> # Create a point cluster object # ============================================================================> point_cluster = PointCluster(id=event_key, points=event_points, values=event_values) # ============================================================================> # Estimate the background correction of the detected feature # ============================================================================> # Extract sites for this cluster and estimate the background correction for the event log_strs.append('----------------------------------->>>') log_strs.append( 'Estimating Event {!s} Background Correction'.format( event_num)) # Generate custom grid mask for this dataset event_mask = GridMask(parent=grid, sites_cart=grid.grid2cart( point_cluster.points, origin_shift=True), max_dist=2.0, min_dist=0.0) log_strs.append( '=> Event sites ({!s} points) expanded to {!s} points'.format( len(point_cluster.points), len(event_mask.outer_mask_indices()))) # Select masks to define regions for bdc calculation exp_event_idxs = flex.size_t(event_mask.outer_mask_indices()) reference_idxs = flex.size_t( grid.global_mask().inner_mask_indices()) # ============================================================================> # Generate BDC-estimation curve and estimate BDC # ============================================================================> event_remains, event_corrs, global_corrs = calculate_varying_bdc_correlations( ref_map_data=avrg_map_data, query_map_data=dset_map_data, feature_idxs=exp_event_idxs, reference_idxs=reference_idxs, min_remain=1.0 - args.params.background_correction.max_bdc, max_remain=1.0 - args.params.background_correction.min_bdc, bdc_increment=args.params.background_correction.increment, verbose=verbose) event_remain_est = calculate_maximum_series_discrepancy( labels=event_remains, series_1=global_corrs, series_2=event_corrs) analyse_graphs.write_occupancy_graph( f_name=dataset.file_manager.get_file('bdc_est_png').format( event_num), x_values=event_remains, global_values=global_corrs, local_values=event_corrs) log_strs.append( '=> Event Background Correction estimated as {!s}'.format( 1 - event_remain_est)) # Reporting (log is normally silenced) blob_finder.log('Min-Max: {} {}'.format( 1.0 - args.params.background_correction.max_bdc, 1.0 - args.params.background_correction.min_bdc)) blob_finder.log('Event number: {}'.format(event_num)) blob_finder.log('Event Remains: {}'.format(','.join( map(str, event_remains)))) blob_finder.log('Event Corrs: {}'.format(','.join( map(str, event_corrs)))) blob_finder.log('Global Corrs: {}'.format(','.join( map(str, global_corrs)))) # Apply multiplier if provided blob_finder.log('Applying multiplier to output 1-BDC: {}'.format( args.params.background_correction.output_multiplier)) event_remain_est = min( event_remain_est * args.params.background_correction.output_multiplier, 1.0 - args.params.background_correction.min_bdc) # ============================================================================> # Calculate the map correlations at the selected BDC # ============================================================================> event_map_data = calculate_bdc_subtracted_map( ref_map_data=avrg_map_data, query_map_data=dset_map_data, bdc=1.0 - event_remain_est) global_corr = numpy.corrcoef( event_map_data.select(reference_idxs), avrg_map_data.select(reference_idxs))[0, 1] local_corr = numpy.corrcoef( event_map_data.select(exp_event_idxs), avrg_map_data.select(exp_event_idxs))[0, 1] # ============================================================================> # Write out EVENT map (in the reference frame) and grid masks # ============================================================================> if args.output.developer.write_reference_frame_maps: event_map = dataset_map.new_from_template(event_map_data, sparse=False) event_map.to_file( filename=dataset.file_manager.get_file('event_map').format( event_num, event_remain_est), space_group=grid.space_group()) if args.output.developer.write_reference_frame_grid_masks: grid.write_indices_as_map( indices=event_mask.outer_mask_indices(), f_name=dataset.file_manager.get_file('grid_mask').replace( '.ccp4', '') + '-event-mask-{}.ccp4'.format(event_num)) # ============================================================================> # Find the nearest atom to the event # ============================================================================> atm = find_nearest_atoms(atoms=list( protein(dataset.model.hierarchy).atoms_with_labels()), query=dataset.model.alignment.ref2nat( grid.grid2cart(sites_grid=[ map(int, point_cluster.centroid) ], origin_shift=True)))[0] log_strs.append( '=> Nearest Residue to event: Chain {}, Residue {} {}'.format( atm.chain_id, atm.resname, atm.resid())) # ============================================================================> # Create an event object # ============================================================================> event_obj = Event(id=point_cluster.id, cluster=point_cluster) event_obj.info.estimated_pseudo_occupancy = event_remain_est event_obj.info.estimated_bdc = 1.0 - event_remain_est event_obj.info.global_correlation = global_corr event_obj.info.local_correlation = local_corr # ============================================================================> # Append to dataset handler # ============================================================================> dataset.events.append(event_obj) # ============================================================================> # Write out pymol script to load all of the maps easily # ============================================================================> pml = PythonScript() pml.set_normalise_maps(False) # Load Structures name = pml.load_pdb( f_name=dataset.file_manager.get_file('aligned_model')) pml.repr_as(obj=name, style='sticks') name = pml.load_pdb( f_name=dataset.file_manager.get_file('symmetry_copies')) pml.repr_hide(obj=name) # Load Sampled Map name = pml.load_map( f_name=dataset.file_manager.get_file('sampled_map')) mesh = pml.make_mesh(obj=name, contour_level=1.0, colour='blue') # Load Z-maps name = pml.load_map(f_name=dataset.file_manager.get_file('z_map')) mesh = pml.make_mesh(obj=name, mesh_suffix='.plus', contour_level=3.0, colour='green') mesh = pml.make_mesh(obj=name, mesh_suffix='.mins', contour_level=-3.0, colour='red') # Load Event maps for f in sorted( glob.glob( dataset.file_manager.get_file('event_map').format( '*', '*'))): name = pml.load_map(f_name=f) mesh = pml.make_mesh(obj=name, contour_level=float(f.split('_')[-2]), colour='hotpink') # Load Miscellaneous maps (e.g. masks) for f in sorted( glob.glob( os.path.join(dataset.file_manager.get_dir('root'), '*mask*.ccp4'))): name = pml.load_map(f_name=f) mesh = pml.make_mesh(obj=name, contour_level=0.0, colour='grey') pml.write_script(f_name=dataset.file_manager.get_file('pymol_script'), overwrite=True) return (dataset, dataset_map.meta, log_strs)
def run(args): from libtbx.phil import command_line from dials.util.command_line import Importer from dials.array_family import flex print args importer = Importer(args, check_format=False) assert len(importer.datablocks) == 1 sweeps = importer.datablocks[0].extract_imagesets() assert len(sweeps) == 1 sweep = sweeps[0] cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments) working_phil.show() params = working_phil.extract() assert params.unit_cell is not None assert params.space_group is not None unit_cell = params.unit_cell space_group = params.space_group.group() import random from dxtbx.model.crystal import crystal_model from cctbx import crystal, miller from scitbx import matrix flex.set_random_seed(params.random_seed) random.seed(params.random_seed) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) # the reciprocal matrix B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose() n_predicted = flex.double() def predict_once(args): from dxtbx.model.experiment.experiment_list import Experiment U = args[0] A = U * B direct_matrix = A.inverse() cryst_model = crystal_model(direct_matrix[0:3], direct_matrix[3:6], direct_matrix[6:9], space_group=space_group) experiment = Experiment(imageset=sweep, beam=sweep.get_beam(), detector=sweep.get_detector(), goniometer=sweep.get_goniometer(), scan=sweep.get_scan(), crystal=cryst_model) predicted_reflections = flex.reflection_table.from_predictions( experiment) miller_indices = predicted_reflections['miller_index'] miller_set = miller.set( crystal_symmetry, miller_indices, anomalous_flag=True) if params.d_min is not None: resolution_sel = miller_set.d_spacings().data() > params.d_min predicted_reflections = predicted_reflections.select(resolution_sel) return len(predicted_reflections) from libtbx import easy_mp args = [(random_rotation(),) for i in range(params.n_samples)] results = easy_mp.parallel_map( func=predict_once, iterable=args, processes=params.nproc, preserve_order=True, preserve_exception_message=True) n_predicted = flex.double(results) print "Basic statistics:" from scitbx.math import basic_statistics stats = basic_statistics(n_predicted) stats.show() print "Histogram:" hist = flex.histogram(n_predicted, n_slots=20) hist.show() print "Raw spot counts:" print list(n_predicted) if params.plot: from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("predicted_count_histogram.pdf") pdf.savefig(fig) pdf.close()
def run(args): from libtbx.phil import command_line from dials.util.command_line import Importer from dials.array_family import flex print(args) importer = Importer(args, check_format=False) assert len(importer.datablocks) == 1 sweeps = importer.datablocks[0].extract_imagesets() assert len(sweeps) == 1 sweep = sweeps[0] cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments) working_phil.show() params = working_phil.extract() assert params.unit_cell is not None assert params.space_group is not None unit_cell = params.unit_cell space_group = params.space_group.group() import random from dxtbx.model.crystal import crystal_model from cctbx import crystal, miller from scitbx import matrix flex.set_random_seed(params.random_seed) random.seed(params.random_seed) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) # the reciprocal matrix B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose() n_predicted = flex.double() def predict_once(args): from dxtbx.model.experiment.experiment_list import Experiment U = args[0] A = U * B direct_matrix = A.inverse() cryst_model = crystal_model( direct_matrix[0:3], direct_matrix[3:6], direct_matrix[6:9], space_group=space_group, ) experiment = Experiment( imageset=sweep, beam=sweep.get_beam(), detector=sweep.get_detector(), goniometer=sweep.get_goniometer(), scan=sweep.get_scan(), crystal=cryst_model, ) predicted_reflections = flex.reflection_table.from_predictions(experiment) miller_indices = predicted_reflections["miller_index"] miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=True) if params.d_min is not None: resolution_sel = miller_set.d_spacings().data() > params.d_min predicted_reflections = predicted_reflections.select(resolution_sel) return len(predicted_reflections) from libtbx import easy_mp args = [(random_rotation(),) for i in range(params.n_samples)] results = easy_mp.parallel_map( func=predict_once, iterable=args, processes=params.nproc, preserve_order=True, preserve_exception_message=True, ) n_predicted = flex.double(results) print("Basic statistics:") from scitbx.math import basic_statistics stats = basic_statistics(n_predicted) stats.show() print("Histogram:") hist = flex.histogram(n_predicted, n_slots=20) hist.show() print("Raw spot counts:") print(list(n_predicted)) if params.plot: from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc("font", family="serif") pyplot.rc("font", serif="Times New Roman") red, blue = "#B2182B", "#2166AC" fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.bar( hist.slot_centers(), hist.slots(), width=0.75 * hist.slot_width(), color=blue, edgecolor=blue, ) ax.set_xlabel("Spot count") ax.set_ylabel("Frequency") pdf = PdfPages("predicted_count_histogram.pdf") pdf.savefig(fig) pdf.close()
def scitbx_stats(data): from scitbx.math import basic_statistics bs = basic_statistics(values=data) return bs.mean, bs.bias_corrected_standard_deviation
def scitbx_stats(data): from scitbx.math import basic_statistics bs = basic_statistics(values = data) return bs.mean, bs.bias_corrected_standard_deviation
def add_species(self,pdb_input=None,n_copies=1,form_factor_table='WK1995'): if (n_copies <= 0): raise Sorry('n_copies has to be greater than or equal to 1') # construct entry s = species() s.n_copies = n_copies s.pdb_input = pdb_input # find center and radius of sphere enclosing molecule atoms = s.pdb_input.atoms() x = flex.double(len(atoms)) y = flex.double(len(atoms)) z = flex.double(len(atoms)) for i in xrange(len(atoms)): x[i] = atoms[i].xyz[0] y[i] = atoms[i].xyz[1] z[i] = atoms[i].xyz[2] x_stats = basic_statistics(x) y_stats = basic_statistics(y) z_stats = basic_statistics(z) r = flex.double( ( (x_stats.max - x_stats.min)/2.0, (y_stats.max - y_stats.min)/2.0, (z_stats.max - z_stats.min)/2.0 ) ) center = (x_stats.mean,y_stats.mean,z_stats.mean) s.radius = r.norm() # center model at origin center = flex.double(center) s.xyz = flex.vec3_double(len(atoms)) for i in xrange(len(atoms)): s.xyz[i] = tuple(flex.double(atoms[i].xyz) - center) # determine scattering types mon_lib_srv = server.server() ener_lib = server.ener_lib() interpreted_pdb = pdb_interpretation.process\ (mon_lib_srv=mon_lib_srv,ener_lib=ener_lib, pdb_inp=s.pdb_input) s.scattering_types = interpreted_pdb.all_chain_proxies.\ scattering_type_registry.symbols s.scattering_type_registry = scattering_type_registry() for f in s.scattering_types: s.scattering_type_registry.process(f) s.scattering_type_registry.assign_from_table(form_factor_table) s.n_electrons = s.scattering_type_registry.\ sum_of_scattering_factors_at_diffraction_angle_0() # apply solvent model if (self.use_solvent): sm = solvent_model() sm.interpreted_pdb = interpreted_pdb sm.xyz = s.xyz sm.fudge_factor = 0.6 s.scattering_type_registry = sm.add_bulk_solvent(s.scattering_type_registry) s.scattering_types = sm.scattering_types sm.boundary_layer_scale = 0.6 s.boundary_layer_scaling_factors = sm.add_boundary_layer_solvent\ (s.scattering_type_registry) else: s.boundary_layer_scaling_factors = flex.double(len(s.xyz),0.0) # finalize entry self.species.append(s)