def show_overall_statistics(m, header): s = maptbx.more_statistics(m) print(header) print(" min/max/mean: %6.4f %6.4f %6.4f" % (s.min(), s.max(), s.mean())) print(" kurtosis : %6.4f" % s.kurtosis()) print(" skewness : %6.4f" % s.skewness()) print(" sigma : %6.4f" % s.sigma())
def b_factor_sharpening_by_map_kurtosis_maximization(map_coeffs, show=True, b_sharp_best=None, b_only=False, b_min=-100, b_max=100, b_step=5): ss = 1. / flex.pow2(map_coeffs.d_spacings().data()) / 4. if (b_sharp_best is None): b_sharp_best = None kurt = -999 for b_sharp in range(b_min, b_max, b_step): k_sharp = 1. / flex.exp(-ss * b_sharp) map_coeffs_ = map_coeffs.deep_copy().customized_copy( data=map_coeffs.data() * k_sharp) fft_map = map_coeffs_.fft_map(resolution_factor=0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() o = maptbx.more_statistics(map_data) kurt_ = o.kurtosis() if (kurt_ > kurt): kurt = kurt_ b_sharp_best = b_sharp if (show): print "b_sharp: %6.1f skewness: %6.4f kurtosis: %6.4f" % ( b_sharp, o.skewness(), o.kurtosis()) if (show): print "Best sharpening B-factor:", b_sharp_best k_sharp = 1. / flex.exp(-ss * b_sharp_best) if (b_only): return b_sharp_best else: return map_coeffs.customized_copy(data=map_coeffs.data() * k_sharp)
def b_factor_sharpening_by_map_kurtosis_maximization(map_coeffs, show=True, b_sharp_best=None, b_only=False): ss = 1./flex.pow2(map_coeffs.d_spacings().data()) / 4. if(b_sharp_best is None): b_sharp_best = None kurt = -999 for b_sharp in range(-100,100,5): k_sharp = 1./flex.exp(-ss * b_sharp) map_coeffs_ = map_coeffs.deep_copy().customized_copy( data = map_coeffs.data()*k_sharp) fft_map = map_coeffs_.fft_map(resolution_factor = 0.25) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() o = maptbx.more_statistics(map_data) kurt_ = o.kurtosis() if(kurt_ > kurt): kurt = kurt_ b_sharp_best = b_sharp if(show): print "b_sharp: %6.1f skewness: %6.4f kurtosis: %6.4f"%(b_sharp, o.skewness(), o.kurtosis()) if(show): print "Best sharpening B-factor:", b_sharp_best k_sharp = 1./flex.exp(-ss * b_sharp_best) if(b_only): return b_sharp_best else: return map_coeffs.customized_copy(data = map_coeffs.data()*k_sharp)
def show_overall_statistics(m, header): s = maptbx.more_statistics(m) print header print " min/max/mean: %6.4f %6.4f %6.4f" % (s.min(), s.max(), s.mean()) print " kurtosis : %6.4f" % s.kurtosis() print " skewness : %6.4f" % s.skewness() print " sigma : %6.4f" % s.sigma()
def get_map_summary (map, resolution_factor=0.25) : info = [] real_map = map.real_map_unpadded() n_grid_points = real_map.size() map.apply_volume_scaling() stats_vol = map.statistics() info.append(("Grid points (with resolution_factor=%g)" % resolution_factor, str(n_grid_points))) info.append(("Min value (volume-scaled)", "%.2f" % stats_vol.min())) info.append(("Max value (volume-scaled)", "%.2f" % stats_vol.max())) info.append(("Mean value (volume-scaled)", "%.2f" % stats_vol.mean())) info.append(("Sigma (volume-scaled)", "%.2f" % stats_vol.sigma())) map.apply_sigma_scaling() stats_sigma = map.statistics() info.append(("Min value (sigma-scaled)", "%.2f" % stats_sigma.min())) info.append(("Max value (sigma-scaled)", "%.2f" % stats_sigma.max())) from cctbx import maptbx more_stats = maptbx.more_statistics(map.real_map(False)) info.append(("Skewness", "%.2f" % more_stats.skewness())) return info
def get_map_summary(map, resolution_factor=0.25): info = [] real_map = map.real_map_unpadded() n_grid_points = real_map.size() map.apply_volume_scaling() stats_vol = map.statistics() info.append(("Grid points (with resolution_factor=%g)" % resolution_factor, str(n_grid_points))) info.append(("Min value (volume-scaled)", "%.2f" % stats_vol.min())) info.append(("Max value (volume-scaled)", "%.2f" % stats_vol.max())) info.append(("Mean value (volume-scaled)", "%.2f" % stats_vol.mean())) info.append(("Sigma (volume-scaled)", "%.2f" % stats_vol.sigma())) map.apply_sigma_scaling() stats_sigma = map.statistics() info.append(("Min value (sigma-scaled)", "%.2f" % stats_sigma.min())) info.append(("Max value (sigma-scaled)", "%.2f" % stats_sigma.max())) from cctbx import maptbx more_stats = maptbx.more_statistics(map.real_map(False)) info.append(("Skewness", "%.2f" % more_stats.skewness())) return info
def show_cycle_summary(self, out=None): if not self.params.verbose: return if out is None: out = sys.stdout self.more_statistics = maptbx.more_statistics(self.map) self._stats.add_cycle( cycle=self.i_cycle+1, radius=self.radius, d_min=self.d_min, mask_percent=self.mask_percent, mean_solvent_density=self.mean_solvent_density, mean_protein_density=self.mean_protein_density, f000_over_v=self.f000_over_v, truncate_density=self.truncate_density, truncate_min=self.truncate_min, truncate_min_percent=self.truncate_min_percent, truncate_max=self.truncate_max, truncate_max_percent=self.truncate_max_percent, ncs_cc=self.ncs_cc, k_flip=self.k_flip, solvent_add=self.solvent_add, rms_solvent_density=self.rms_solvent_density, rms_protein_density=self.rms_protein_density, standard_deviation_local_rms=self.standard_deviation_local_rms, mean_delta_phi=flex.mean(self.mean_delta_phi)/pi_180, mean_delta_phi_initial=flex.mean(self.mean_delta_phi_initial)/pi_180, r1_factor=self.r1_factor, r1_factor_fom=self.r1_factor_fom, fom=self.mean_fom, fom_binned=self.mean_fom_binned, skewness=self.more_statistics.skewness()) summary = self._stats.format_summary() print >> self.log, summary self.log.flush() if (not self.as_gui_program) : libtbx.call_back(message="summary", data=summary, accumulate=True) else : libtbx.call_back(message="plot_current_stats", data=self._stats.get_fom_for_plot())
def show_cycle_summary(self, out=None): if not self.params.verbose: return if out is None: out = sys.stdout self.more_statistics = maptbx.more_statistics(self.map) self._stats.add_cycle( cycle=self.i_cycle+1, radius=self.radius, d_min=self.d_min, mask_percent=self.mask_percent, mean_solvent_density=self.mean_solvent_density, mean_protein_density=self.mean_protein_density, f000_over_v=self.f000_over_v, truncate_density=self.truncate_density, truncate_min=self.truncate_min, truncate_min_percent=self.truncate_min_percent, truncate_max=self.truncate_max, truncate_max_percent=self.truncate_max_percent, ncs_cc=self.ncs_cc, k_flip=self.k_flip, solvent_add=self.solvent_add, rms_solvent_density=self.rms_solvent_density, rms_protein_density=self.rms_protein_density, standard_deviation_local_rms=self.standard_deviation_local_rms, mean_delta_phi=flex.mean(self.mean_delta_phi)/pi_180, mean_delta_phi_initial=flex.mean(self.mean_delta_phi_initial)/pi_180, r1_factor=self.r1_factor, r1_factor_fom=self.r1_factor_fom, fom=self.mean_fom, fom_binned=self.mean_fom_binned, skewness=self.more_statistics.skewness()) summary = self._stats.format_summary() print >> self.log, summary self.log.flush() if (not self.as_gui_program) : libtbx.call_back(message="summary", data=summary, accumulate=True) else : libtbx.call_back(message="plot_current_stats", data=self._stats.get_fom_for_plot())
def run(args, out=sys.stdout, validated=False): show_citation(out=out) if (len(args) == 0): master_phil.show(out=out) print('\nUsage: phenix.map_comparison <CCP4> <CCP4>\n',\ ' phenix.map_comparison <CCP4> <MTZ> mtz_label_1=<label>\n',\ ' phenix.map_comparison <MTZ 1> mtz_label_1=<label 1> <MTZ 2> mtz_label_2=<label 2>\n', file=out) sys.exit() # process arguments params = None input_attributes = ['map_1', 'mtz_1', 'map_2', 'mtz_2'] try: # automatic parsing params = phil.process_command_line_with_files( args=args, master_phil=master_phil).work.extract() except Exception: # map_file_def only handles one map phil from libtbx.phil.command_line import argument_interpreter arg_int = argument_interpreter(master_phil=master_phil) command_line_args = list() map_files = list() for arg in args: if (os.path.isfile(arg)): map_files.append(arg) else: command_line_args.append(arg_int.process(arg)) params = master_phil.fetch(sources=command_line_args).extract() # check if more files are necessary n_defined = 0 for attribute in input_attributes: if (getattr(params.input, attribute) is not None): n_defined += 1 # matches files to phil scope, stops once there is sufficient data for map_file in map_files: if (n_defined < 2): current_map = file_reader.any_file(map_file) if (current_map.file_type == 'ccp4_map'): n_defined += 1 if (params.input.map_1 is None): params.input.map_1 = map_file elif (params.input.map_2 is None): params.input.map_2 = map_file elif (current_map.file_type == 'hkl'): n_defined += 1 if (params.input.mtz_1 is None): params.input.mtz_1 = map_file elif (params.input.mtz_2 is None): params.input.mtz_2 = map_file else: print('WARNING: only the first two files are used', file=out) break # validate arguments (GUI sets validated to true, no need to run again) assert (params is not None) if (not validated): validate_params(params) # --------------------------------------------------------------------------- # check if maps need to be generated from mtz n_maps = 0 maps = list() map_names = list() for attribute in input_attributes: filename = getattr(params.input, attribute) if (filename is not None): map_names.append(filename) current_map = file_reader.any_file(filename) maps.append(current_map) if (current_map.file_type == 'ccp4_map'): n_maps += 1 # construct maps, if necessary crystal_gridding = None m1 = None m2 = None # 1 map, 1 mtz file if (n_maps == 1): for current_map in maps: if (current_map.file_type == 'ccp4_map'): uc = current_map.file_object.unit_cell() sg_info = space_group_info( current_map.file_object.space_group_number) n_real = current_map.file_object.unit_cell_grid crystal_gridding = maptbx.crystal_gridding( uc, space_group_info=sg_info, pre_determined_n_real=n_real) m1 = current_map.file_object.map_data() if (crystal_gridding is not None): label = None for attribute in [('mtz_1', 'mtz_label_1'), ('mtz_2', 'mtz_label_2')]: filename = getattr(params.input, attribute[0]) label = getattr(params.input, attribute[1]) if ((filename is not None) and (label is not None)): break # labels will match currently open mtz file for current_map in maps: if (current_map.file_type == 'hkl'): m2 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=current_map.file_server. get_miller_array( label)).apply_sigma_scaling().real_map_unpadded() else: raise Sorry('Gridding is not defined.') # 2 mtz files elif (n_maps == 0): crystal_symmetry = get_crystal_symmetry(maps[0]) d_min = min(get_d_min(maps[0]), get_d_min(maps[1])) crystal_gridding = maptbx.crystal_gridding( crystal_symmetry.unit_cell(), d_min=d_min, resolution_factor=params.options.resolution_factor, space_group_info=crystal_symmetry.space_group_info()) m1 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=maps[0].file_server.get_miller_array( params.input.mtz_label_1)).apply_sigma_scaling( ).real_map_unpadded() m2 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=maps[1].file_server.get_miller_array( params.input.mtz_label_2)).apply_sigma_scaling( ).real_map_unpadded() # 2 maps else: m1 = maps[0].file_object.map_data() m2 = maps[1].file_object.map_data() # --------------------------------------------------------------------------- # analyze maps assert ((m1 is not None) and (m2 is not None)) # show general statistics s1 = maptbx.more_statistics(m1) s2 = maptbx.more_statistics(m2) show_overall_statistics(out=out, s=s1, header="Map 1 (%s):" % map_names[0]) show_overall_statistics(out=out, s=s2, header="Map 2 (%s):" % map_names[1]) cc_input_maps = flex.linear_correlation(x=m1.as_1d(), y=m2.as_1d()).coefficient() print("CC, input maps: %6.4f" % cc_input_maps, file=out) # compute CCpeak cc_peaks = list() m1_he = maptbx.volume_scale(map=m1, n_bins=10000).map_data() m2_he = maptbx.volume_scale(map=m2, n_bins=10000).map_data() cc_quantile = flex.linear_correlation(x=m1_he.as_1d(), y=m2_he.as_1d()).coefficient() print("CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \ cc_quantile, file=out) print("Peak correlation:", file=out) print(" cutoff CCpeak", file=out) cutoffs = [i / 100. for i in range(1, 90)] + [i / 1000 for i in range(900, 1000)] for cutoff in cutoffs: cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff) print(" %3.2f %7.4f" % (cutoff, cc_peak), file=out) cc_peaks.append((cutoff, cc_peak)) # compute discrepancy function (D-function) discrepancies = list() cutoffs = flex.double(cutoffs) df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs) print("Discrepancy function:", file=out) print(" cutoff D", file=out) for c, d in zip(cutoffs, df): print(" %3.2f %7.4f" % (c, d), file=out) discrepancies.append((c, d)) # compute and output histograms h1 = maptbx.histogram(map=m1, n_bins=10000) h2 = maptbx.histogram(map=m2, n_bins=10000) print("Map histograms:", file=out) print("Map 1 (%s) Map 2 (%s)"%\ (params.input.map_1,params.input.map_2), file=out) print("(map_value,cdf,frequency) <> (map_value,cdf,frequency)", file=out) for a1, c1, v1, a2, c2, v2 in zip(h1.arguments(), h1.c_values(), h1.values(), h2.arguments(), h2.c_values(), h2.values()): print("(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%\ (a1,c1,v1, a2,c2,v2), file=out) # store results s1_dict = create_statistics_dict(s=s1) s2_dict = create_statistics_dict(s=s2) results = dict() inputs = list() for attribute in input_attributes: filename = getattr(params.input, attribute) if (filename is not None): inputs.append(filename) assert (len(inputs) == 2) results['map_files'] = inputs results['map_statistics'] = (s1_dict, s2_dict) results['cc_input_maps'] = cc_input_maps results['cc_quantile'] = cc_quantile results['cc_peaks'] = cc_peaks results['discrepancies'] = discrepancies # TODO, verify h1,h2 are not dicts, e.g. .values is py2/3 compat. I assume it is here results['map_histograms'] = ((h1.arguments(), h1.c_values(), h1.values()), (h2.arguments(), h2.c_values(), h2.values())) return results
def skewness(self): return maptbx.more_statistics(self.real_map()).skewness()
def skewness(self): return maptbx.more_statistics(self.real_map()).skewness()
def exercise_statistics(): import scitbx.math for flex_type in flex_types(): a = flex_type(flex.grid((3,5))) s = maptbx.statistics(a) assert s.min() == 0 assert s.max() == 0 assert s.mean() == 0 assert s.mean_sq() == 0 assert s.sigma() == 0 a = flex_type([random.random() for i in xrange(3*5)]) a.resize(flex.grid((3,5))) s = maptbx.statistics(a) assert approx_equal(flex.min(a), s.min()) assert approx_equal(flex.max(a), s.max()) assert approx_equal(flex.mean(a), s.mean()) assert approx_equal(flex.mean_sq(a), s.mean_sq()) assert approx_equal(flex.mean_sq(a)-flex.mean(a)**2, s.sigma()**2) b = flex_type(flex.grid((4,6)).set_focus((3,5))) for i in xrange(3): for j in xrange(5): b[(i,j)] = a[(i,j)] b[(3,5)] = -1 b[(2,5)] = 2 b.resize(flex.grid((-2,3), (2,9)).set_focus((1,8))) t = maptbx.statistics(b) assert not_approx_equal(flex.min(b), t.min()) assert not_approx_equal(flex.max(b), t.max()) assert not_approx_equal(flex.mean(b), t.mean()) assert not_approx_equal(flex.mean_sq(b), t.mean_sq()) assert not_approx_equal(flex.mean_sq(b)-flex.mean(b)**2, t.sigma()**2) assert approx_equal(s.min(), t.min()) assert approx_equal(s.max(), t.max()) assert approx_equal(s.mean(), t.mean()) assert approx_equal(s.mean_sq(), t.mean_sq()) assert approx_equal(s.sigma(), t.sigma()) a = flex.double(flex.grid(5,3)) s = maptbx.more_statistics(a) assert s.min() == 0 assert s.max() == 0 assert s.mean() == 0 assert s.mean_sq() == 0 assert s.sigma() == 0 assert s.skewness() == 0 assert s.kurtosis() == 0 a = flex.random_double(5*3) reference = scitbx.math.basic_statistics(a) a.resize(flex.grid(5,3)) s = maptbx.more_statistics(a) assert approx_equal(s.min(), reference.min) assert approx_equal(s.max(), reference.max) assert approx_equal(s.mean(), reference.mean) assert approx_equal(s.sigma(), reference.biased_standard_deviation) assert approx_equal(s.skewness(), reference.skew) assert approx_equal(s.kurtosis(), reference.kurtosis) b = flex.double(flex.grid((6,4)).set_focus((5,3))) for i in xrange(5): for j in xrange(3): b[(i,j)] = a[(i,j)] b[(5,3)] = -1 b[(5,2)] = 2 b.resize(flex.grid((3,-2), (9,2)).set_focus((8,1))) t = maptbx.statistics(b) assert approx_equal(s.min(), reference.min) assert approx_equal(s.max(), reference.max) assert approx_equal(s.mean(), reference.mean) assert approx_equal(s.sigma(), reference.biased_standard_deviation) assert approx_equal(s.skewness(), reference.skew) assert approx_equal(s.kurtosis(), reference.kurtosis) m = flex.double(flex.grid((6,4,8)).set_focus((5,3,7)))
def run(args, out=sys.stdout, validated=False): show_citation(out=out) if (len(args) == 0): master_phil.show(out=out) print >> out,\ '\nUsage: phenix.map_comparison <CCP4> <CCP4>\n',\ ' phenix.map_comparison <CCP4> <MTZ> mtz_label_1=<label>\n',\ ' phenix.map_comparison <MTZ 1> mtz_label_1=<label 1> <MTZ 2> mtz_label_2=<label 2>\n' sys.exit() # process arguments params = None input_attributes = ['map_1', 'mtz_1', 'map_2', 'mtz_2'] try: # automatic parsing params = phil.process_command_line_with_files( args=args, master_phil=master_phil).work.extract() except Exception: # map_file_def only handles one map phil from libtbx.phil.command_line import argument_interpreter arg_int = argument_interpreter(master_phil=master_phil) command_line_args = list() map_files = list() for arg in args: if (os.path.isfile(arg)): map_files.append(arg) else: command_line_args.append(arg_int.process(arg)) params = master_phil.fetch(sources=command_line_args).extract() # check if more files are necessary n_defined = 0 for attribute in input_attributes: if (getattr(params.input, attribute) is not None): n_defined += 1 # matches files to phil scope, stops once there is sufficient data for map_file in map_files: if (n_defined < 2): current_map = file_reader.any_file(map_file) if (current_map.file_type == 'ccp4_map'): n_defined += 1 if (params.input.map_1 is None): params.input.map_1 = map_file elif (params.input.map_2 is None): params.input.map_2 = map_file elif (current_map.file_type == 'hkl'): n_defined += 1 if (params.input.mtz_1 is None): params.input.mtz_1 = map_file elif (params.input.mtz_2 is None): params.input.mtz_2 = map_file else: print >> out, 'WARNING: only the first two files are used' break # validate arguments (GUI sets validated to true, no need to run again) assert (params is not None) if (not validated): validate_params(params) # --------------------------------------------------------------------------- # check if maps need to be generated from mtz n_maps = 0 maps = list() map_names = list() for attribute in input_attributes: filename = getattr(params.input, attribute) if (filename is not None): map_names.append(filename) current_map = file_reader.any_file(filename) maps.append(current_map) if (current_map.file_type == 'ccp4_map'): n_maps += 1 # construct maps, if necessary crystal_gridding = None m1 = None m2 = None # 1 map, 1 mtz file if (n_maps == 1): for current_map in maps: if (current_map.file_type == 'ccp4_map'): uc = current_map.file_object.unit_cell() sg_info = space_group_info(current_map.file_object.space_group_number) n_real = current_map.file_object.unit_cell_grid crystal_gridding = maptbx.crystal_gridding( uc, space_group_info=sg_info, pre_determined_n_real=n_real) m1 = current_map.file_object.map_data() if (crystal_gridding is not None): label = None for attribute in [('mtz_1', 'mtz_label_1'), ('mtz_2', 'mtz_label_2')]: filename = getattr(params.input, attribute[0]) label = getattr(params.input, attribute[1]) if ( (filename is not None) and (label is not None) ): break # labels will match currently open mtz file for current_map in maps: if (current_map.file_type == 'hkl'): m2 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=current_map.file_server.get_miller_array( label)).apply_sigma_scaling().real_map_unpadded() else: raise Sorry('Gridding is not defined.') # 2 mtz files elif (n_maps == 0): crystal_symmetry = get_crystal_symmetry(maps[0]) d_min = min(get_d_min(maps[0]), get_d_min(maps[1])) crystal_gridding = maptbx.crystal_gridding( crystal_symmetry.unit_cell(), d_min=d_min, resolution_factor=params.options.resolution_factor, space_group_info=crystal_symmetry.space_group_info()) m1 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=maps[0].file_server.get_miller_array( params.input.mtz_label_1)).apply_sigma_scaling().real_map_unpadded() m2 = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=maps[1].file_server.get_miller_array( params.input.mtz_label_2)).apply_sigma_scaling().real_map_unpadded() # 2 maps else: m1 = maps[0].file_object.map_data() m2 = maps[1].file_object.map_data() # --------------------------------------------------------------------------- # analyze maps assert ( (m1 is not None) and (m2 is not None) ) # show general statistics s1 = maptbx.more_statistics(m1) s2 = maptbx.more_statistics(m2) show_overall_statistics(out=out, s=s1, header="Map 1 (%s):"%map_names[0]) show_overall_statistics(out=out, s=s2, header="Map 2 (%s):"%map_names[1]) cc_input_maps = flex.linear_correlation(x = m1.as_1d(), y = m2.as_1d()).coefficient() print >> out, "CC, input maps: %6.4f" % cc_input_maps # compute CCpeak cc_peaks = list() m1_he = maptbx.volume_scale(map = m1, n_bins = 10000).map_data() m2_he = maptbx.volume_scale(map = m2, n_bins = 10000).map_data() cc_quantile = flex.linear_correlation(x = m1_he.as_1d(), y = m2_he.as_1d()).coefficient() print >> out, "CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \ cc_quantile print >> out, "Peak correlation:" print >> out, " cutoff CCpeak" cutoffs = [i/100. for i in range(1,90)]+ [i/1000 for i in range(900,1000)] for cutoff in cutoffs: cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff) print >> out, " %3.2f %7.4f" % (cutoff, cc_peak) cc_peaks.append((cutoff, cc_peak)) # compute discrepancy function (D-function) discrepancies = list() cutoffs = flex.double(cutoffs) df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs) print >> out, "Discrepancy function:" print >> out, " cutoff D" for c, d in zip(cutoffs, df): print >> out, " %3.2f %7.4f" % (c,d) discrepancies.append((c, d)) # compute and output histograms h1 = maptbx.histogram(map=m1, n_bins=10000) h2 = maptbx.histogram(map=m2, n_bins=10000) print >> out, "Map histograms:" print >> out, "Map 1 (%s) Map 2 (%s)"%\ (params.input.map_1,params.input.map_2) print >> out, "(map_value,cdf,frequency) <> (map_value,cdf,frequency)" for a1,c1,v1, a2,c2,v2 in zip(h1.arguments(), h1.c_values(), h1.values(), h2.arguments(), h2.c_values(), h2.values()): print >> out, "(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%\ (a1,c1,v1, a2,c2,v2) # store results s1_dict = create_statistics_dict(s=s1) s2_dict = create_statistics_dict(s=s2) results = dict() inputs = list() for attribute in input_attributes: filename = getattr(params.input,attribute) if (filename is not None): inputs.append(filename) assert (len(inputs) == 2) results['map_files'] = inputs results['map_statistics'] = (s1_dict, s2_dict) results['cc_input_maps'] = cc_input_maps results['cc_quantile'] = cc_quantile results['cc_peaks'] = cc_peaks results['discrepancies'] = discrepancies results['map_histograms'] = ( (h1.arguments(), h1.c_values(), h1.values()), (h2.arguments(), h2.c_values(), h2.values()) ) return results
def run(args, validated=False): show_citation() if ( (len(args) == 0) or (len(args) > 2) ): print '\nUsage: phenix.map_comparison map_1=<first map> map_2=<second map>\n' sys.exit() # process arguments try: # automatic parsing params = phil.process_command_line_with_files( args=args, master_phil=master_phil).work.extract() except Exception: # map_file_def only handles one map phil from libtbx.phil.command_line import argument_interpreter arg_int = argument_interpreter(master_phil=master_phil) command_line_args = list() map_files = list() for arg in args: if (os.path.isfile(arg)): map_files.append(arg) else: command_line_args.append(arg_int.process(arg)) params = master_phil.fetch(sources=command_line_args).extract() for map_file in map_files: if (params.input.map_1 is None): params.input.map_1 = map_file else: params.input.map_2 = map_file # validate arguments (GUI sets validated to true, no need to run again) if (not validated): validate_params(params) # --------------------------------------------------------------------------- # map 1 ccp4_map_1 = iotbx.ccp4_map.map_reader(file_name=params.input.map_1) cs_1 = crystal.symmetry(ccp4_map_1.unit_cell().parameters(), ccp4_map_1.space_group_number) m1 = ccp4_map_1.map_data() # map 2 ccp4_map_2 = iotbx.ccp4_map.map_reader(file_name=params.input.map_2) cs_2 = crystal.symmetry(ccp4_map_2.unit_cell().parameters(), ccp4_map_2.space_group_number) m2 = ccp4_map_2.map_data() # show general statistics s1 = maptbx.more_statistics(m1) s2 = maptbx.more_statistics(m2) show_overall_statistics(s=s1, header="Map 1 (%s):"%params.input.map_1) show_overall_statistics(s=s2, header="Map 2 (%s):"%params.input.map_2) cc_input_maps = flex.linear_correlation(x = m1.as_1d(), y = m2.as_1d()).coefficient() print "CC, input maps: %6.4f" % cc_input_maps # compute CCpeak cc_peaks = list() m1_he = maptbx.volume_scale(map = m1, n_bins = 10000).map_data() m2_he = maptbx.volume_scale(map = m2, n_bins = 10000).map_data() cc_quantile = flex.linear_correlation(x = m1_he.as_1d(), y = m2_he.as_1d()).coefficient() print "CC, quantile rank-scaled (histogram equalized) maps: %6.4f" % \ cc_quantile print "Peak correlation:" print " cutoff CCpeak" for cutoff in [i/100. for i in range(0,100,5)]+[0.99, 1.0]: cc_peak = maptbx.cc_peak(map_1=m1_he, map_2=m2_he, cutoff=cutoff) print " %3.2f %7.4f" % (cutoff, cc_peak) cc_peaks.append((cutoff, cc_peak)) # compute discrepancy function (D-function) discrepancies = list() cutoffs = flex.double([i/20. for i in range(1,20)]) df = maptbx.discrepancy_function(map_1=m1_he, map_2=m2_he, cutoffs=cutoffs) print "Discrepancy function:" print " cutoff D" for c, d in zip(cutoffs, df): print " %3.2f %7.4f" % (c,d) discrepancies.append((c, d)) # compute and output histograms h1 = maptbx.histogram(map=m1, n_bins=10000) h2 = maptbx.histogram(map=m2, n_bins=10000) print "Map histograms:" print "Map 1 (%s) Map 2 (%s)"%(params.input.map_1,params.input.map_2) print "(map_value,cdf,frequency) <> (map_value,cdf,frequency)" for a1,c1,v1, a2,c2,v2 in zip(h1.arguments(), h1.c_values(), h1.values(), h2.arguments(), h2.c_values(), h2.values()): print "(%9.5f %9.5f %9.5f) <> (%9.5f %9.5f %9.5f)"%(a1,c1,v1, a2,c2,v2) # store results s1_dict = create_statistics_dict(s1) s2_dict = create_statistics_dict(s2) results = dict() results['map_files'] = (params.input.map_1, params.input.map_2) results['map_statistics'] = (s1_dict, s2_dict) results['cc_input_maps'] = cc_input_maps results['cc_quantile'] = cc_quantile results['cc_peaks'] = cc_peaks results['discrepancies'] = discrepancies results['map_histograms'] = ( (h1.arguments(), h1.c_values(), h1.values()), (h2.arguments(), h2.c_values(), h2.values()) ) return results