def matthews_analysis(self): from mmtbx.scaling import matthews self.matthews_result = matthews.matthews_rupp( crystal_symmetry=self.f_obs, n_residues=self.params.asu_contents.n_residues, n_bases=self.params.asu_contents.n_bases).show(self.log) self.params.asu_contents.n_residues = self.matthews_result.n_residues self.params.asu_contents.n_bases = self.matthews_result.n_bases if self.params.asu_contents.n_copies_per_asu is None: self.params.asu_contents.n_copies_per_asu = self.matthews_result.n_copies if self.params.solvent_fraction is None: self.params.solvent_fraction = self.matthews_result.solvent_content
def run(args, out=sys.stdout): cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil_string=master_phil_str, pdb_file_def="model", reflection_file_def="data", seq_file_def="sequence", space_group_def="space_group", unit_cell_def="unit_cell", integer_def="n_residues", usage_string="""\ phenix.matthews [data.hkl] [space_group] [unit_cel] [sequence] [n_residues] ... Calculate the expected Matthews coefficient given the crystal symmetry and crystallized molecule(s). """, ) params = cmdline.work.extract() if (params.space_group is None) or (params.unit_cell is None): if params.data is None: raise Sorry( "You must supply both a space group and a unit cell (or " + "a data file containing this information)." ) else: symm = crystal_symmetry_from_any.extract_from(file_name=params.data) space_group_from_file = symm.space_group() if params.space_group is None: if space_group_from_file is not None: params.space_group = symm.space_group() elif space_group_from_file is not None: if space_group_from_file != params.space_group: print >> out, "WARNING: space group mismatch between command line " + "and file:" print >> out, " %s (cmdline), %s (file)" % (params.space_group, space_group_from_file) if params.unit_cell is None: params.unit_cell = symm.unit_cell() validate_params(params, check_symmetry=True) if params.sequence is not None: assert params.n_residues == params.n_bases == None seq_comp = iotbx.bioinformatics.composition_from_sequence_file(file_name=params.sequence, log=out) if seq_comp is not None: params.n_residues = seq_comp.n_residues params.n_bases = seq_comp.n_bases else: raise Sorry("No composition information could be obtained from the " + "sequence file.") elif params.model is not None: assert params.n_residues == params.n_bases == None from iotbx.file_reader import any_file params.n_residues = 0 params.n_bases = 0 pdb_in = any_file(params.model) hierarchy = pdb_in.file_object.hierarchy for chain in hierarchy.models()[0].chains(): if chain.is_protein(): params.n_residues += chain.residue_groups_size() elif chain.is_na(): params.n_bases += chain.residue_groups_size() print >> out, "Space group: %s" % params.space_group print >> out, "Unit cell: %s" % params.unit_cell if params.n_residues > 0: print >> out, "Number of residues: %d" % params.n_residues if params.n_bases > 0: print >> out, "Number of bases: %d" % params.n_bases symm = crystal.symmetry(space_group_info=params.space_group, unit_cell=params.unit_cell) from mmtbx.scaling import matthews result = matthews.matthews_rupp(crystal_symmetry=symm, n_residues=params.n_residues, n_bases=params.n_bases) result.show(out=out) return result
def __init__(self, miller_array, phil_object, out=None, out_plot=None, miller_calc=None, original_intensities=None, completeness_as_non_anomalous=None, verbose=0): if out is None: out=sys.stdout if verbose>0: print >> out print >> out print >> out, "Matthews coefficient and Solvent content statistics" n_copies_solc = 1.0 self.nres_known = False if (phil_object.scaling.input.asu_contents.n_residues is not None or phil_object.scaling.input.asu_contents.n_bases is not None) : self.nres_known = True if (phil_object.scaling.input.asu_contents.sequence_file is not None) : print >> out, " warning: ignoring sequence file" elif (phil_object.scaling.input.asu_contents.sequence_file is not None) : print >> out, " determining composition from sequence file %s" % \ phil_object.scaling.input.asu_contents.sequence_file seq_comp = iotbx.bioinformatics.composition_from_sequence_file( file_name=phil_object.scaling.input.asu_contents.sequence_file, log=out) if (seq_comp is not None) : phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases self.nres_known = True matthews_results =matthews.matthews_rupp( crystal_symmetry = miller_array, n_residues = phil_object.scaling.input.asu_contents.n_residues, n_bases = phil_object.scaling.input.asu_contents.n_bases, out=out,verbose=1) phil_object.scaling.input.asu_contents.n_residues = matthews_results[0] phil_object.scaling.input.asu_contents.n_bases = matthews_results[1] n_copies_solc = matthews_results[2] self.matthews_results = matthews_results if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None: n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu self.defined_copies = n_copies_solc if verbose>0: print >> out,"Number of copies per asymmetric unit provided" print >> out," Will use user specified value of ", n_copies_solc else: phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc self.guessed_copies = n_copies_solc # first report on I over sigma miller_array_new = miller_array self.data_strength = None miller_array_intensities = miller_array if (original_intensities is not None) : assert original_intensities.is_xray_intensity_array() miller_array_intensities = original_intensities if miller_array_intensities.sigmas() is not None: data_strength=data_statistics.i_sigi_completeness_stats( miller_array_intensities, isigi_cut = phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.isigi_cut, completeness_cut = phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.completeness_cut, completeness_as_non_anomalous=completeness_as_non_anomalous) data_strength.show(out) self.data_strength = data_strength if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None: if data_strength.resolution_cut > data_strength.resolution_at_least: phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least else: phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut ## Isotropic wilson scaling if verbose>0: print >> out print >> out print >> out, "Maximum likelihood isotropic Wilson scaling " n_residues = phil_object.scaling.input.asu_contents.n_residues n_bases = phil_object.scaling.input.asu_contents.n_bases if n_residues is None: n_residues = 0 if n_bases is None: n_bases = 0 if n_bases+n_residues==0: raise Sorry("No scatterers available") iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling( miller_array = miller_array_new, n_residues = n_residues* miller_array.space_group().order_z()*n_copies_solc, n_bases=n_bases* miller_array.space_group().order_z()*n_copies_solc) iso_scale_and_b.show(out=out,verbose=verbose) self.iso_scale_and_b = iso_scale_and_b ## Store the b and scale values from isotropic ML scaling self.iso_p_scale = iso_scale_and_b.p_scale self.iso_b_wilson = iso_scale_and_b.b_wilson ## Anisotropic ml wilson scaling if verbose>0: print >> out print >> out print >> out, "Maximum likelihood anisotropic Wilson scaling " aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling( miller_array = miller_array_new, n_residues = n_residues*miller_array.space_group().order_z()*n_copies_solc, n_bases = n_bases*miller_array.space_group().order_z()*n_copies_solc) aniso_scale_and_b.show(out=out,verbose=1) self.aniso_scale_and_b = aniso_scale_and_b try: b_cart = aniso_scale_and_b.b_cart except AttributeError, e: print >> out, "*** ERROR ***" print >> out, str(e) show_exception_info_if_full_testing() return
def _ml_normalisation(intensities, aniso): # estimate number of residues per unit cell mr = matthews.matthews_rupp(intensities.crystal_symmetry()) n_residues = mr.n_residues # estimate B-factor and scale factors for normalisation if aniso: normalisation = absolute_scaling.ml_aniso_absolute_scaling( intensities, n_residues=n_residues) u_star = normalisation.u_star else: normalisation = absolute_scaling.ml_iso_absolute_scaling( intensities, n_residues=n_residues) u_star = adptbx.b_as_u( adptbx.u_iso_as_u_star(intensities.unit_cell(), normalisation.b_wilson)) # record output in log file if aniso: b_cart = normalisation.b_cart logger.info("ML estimate of overall B_cart value:") logger.info( """\ %5.2f, %5.2f, %5.2f %12.2f, %5.2f %19.2f""", b_cart[0], b_cart[3], b_cart[4], b_cart[1], b_cart[5], b_cart[2], ) else: logger.info("ML estimate of overall B value:") logger.info(" %5.2f A**2", normalisation.b_wilson) logger.info("ML estimate of -log of scale factor:") logger.info(" %5.2f", normalisation.p_scale) s = StringIO() mr.show(out=s) normalisation.show(out=s) logger.debug(s.getvalue()) # apply scales return intensities.customized_copy( data=scaling.ml_normalise_aniso( intensities.indices(), intensities.data(), normalisation.p_scale, intensities.unit_cell(), u_star, ), sigmas=scaling.ml_normalise_aniso( intensities.indices(), intensities.sigmas(), normalisation.p_scale, intensities.unit_cell(), u_star, ), )
def __init__(self, miller_array, pre_scaling_protocol, basic_info, out=None): ## Make deep copy of the miller array of interest self.x1 = miller_array.deep_copy() self.options=pre_scaling_protocol self.basic_info= basic_info ## Determine unit_cell contents print >> out print >> out, "Matthews analyses" print >> out, "-----------------" print >> out print >> out, "Inspired by: Kantardjieff and Rupp. Prot. Sci. 12(9): 1865-1871 (2003)." matthews_analyses = matthews.matthews_rupp( crystal_symmetry = self.x1, n_residues = self.basic_info.n_residues, n_bases = self.basic_info.n_bases, out=out, verbose=1) n_residues=matthews_analyses[0] n_bases=matthews_analyses[1] n_copies_solc=matthews_analyses[2] if (self.basic_info.n_residues==None): self.basic_info.n_residues = n_residues if (self.basic_info.n_bases == None): self.basic_info.n_bases = n_bases ## apply resolution cut print >> out print >> out, "Applying resolution cut" print >> out, "-----------------------" if self.options.low_resolution is None: if self.options.high_resolution is None: print >> out, "No resolution cut is made" low_cut=float(1e6) if self.options.low_resolution is not None: low_cut = self.options.low_resolution print >> out, "Specified low resolution limit: %3.2f"%( float(self.options.low_resolution) ) high_cut = 0 if self.options.high_resolution is not None: high_cut = self.options.high_resolution print >> out, "Specified high resolution limit: %3.2f"%( float(self.options.high_resolution) ) ## perform outlier analyses ## ## Do a simple outlier analyses please print >> out print >> out, "Wilson statistics based outlier analyses" print >> out, "----------------------------------------" print >> out native_outlier = data_statistics.possible_outliers( miller_array = self.x1, prob_cut_ex = self.options.outlier_level_extreme, prob_cut_wil = self.options.outlier_level_wilson ) native_outlier.show(out=out) self.x1 = native_outlier.remove_outliers( self.x1 ) ## apply anisotropic scaling (final B-value will be set to b_add)! if self.options.aniso_correction: b_final = self.options.b_add if b_final is None: b_final = 0.0 print >> out print >> out, "Anisotropic absolute scaling of data" print >> out, "--------------------------------------" print >> out aniso_correct = absolute_scaling.ml_aniso_absolute_scaling( miller_array = self.x1, n_residues = n_residues*\ self.x1.space_group().order_z()*n_copies_solc, n_bases = n_bases*\ self.x1.space_group().order_z()*n_copies_solc) aniso_correct.show(out=out,verbose=1) print >> out print >> out, " removing anisotropy for native " print >> out u_star_correct_nat = aniso_correct.u_star self.x1 = absolute_scaling.anisotropic_correction( self.x1, aniso_correct.p_scale, u_star_correct_nat )
def _calculate(self, nres): with no_stdout(): result = matthews_rupp(self.crystal_symmetry, n_residues=nres) return result.solvent_content, result.n_copies