def _try_as_seq(self): # XXX hack to avoid choking on CCP4 maps assert (not self.file_name.endswith(".ccp4")) # XXX hack to avoid choking on NCS files: assert (not self.file_name.endswith(".ncs")) assert (not self.file_name.endswith(".ncs_spec")) from iotbx.bioinformatics import any_sequence_format objects, non_compliant = any_sequence_format(self.file_name) assert (objects is not None), "No sequence data found in file." assert (len(non_compliant) == 0), "Misformatted data in file." for seq_obj in objects: assert (not "-" in seq_obj.sequence) self._file_object = objects # self._try_as_txt() # assert len(self._file_object) != 0 # for _line in self._file_object.splitlines() : # assert not _line.startswith(" ") # line = re.sub(" ", "", _line) # assert ((len(line) == 0) or # (line[0] == ">") or # (line == "*") or # ((line[-1] == '*') and line[:-1].isalpha()) or # line.isalpha()) self._file_type = "seq"
def run (args=(), params=None, out=sys.stdout) : assert (params is not None) seq_files = params.muscle.seq_file output_file = params.muscle.output_file if (output_file is None) or (output_file == "") : output_file = os.path.join(os.getcwd(), "muscle.aln") from iotbx import file_reader from iotbx.bioinformatics import any_sequence_format, sequence seqs = [] for file_name in seq_files : if (file_name.endswith(".pdb") or file_name.endswith(".ent") or file_name.endswith(".pdb.gz") or file_name.endswith(".ent.gz")) : pdb_in = file_reader.any_file(file_name, force_type="pdb").file_object hierarchy = pdb_in.hierarchy first_model = hierarchy.models()[0] found_protein = False for chain in first_model.chains() : if chain.is_protein() : chain_seq = chain.as_padded_sequence() base_name = os.path.basename(file_name) seq_name = "%s_%s" % (os.path.splitext(base_name)[0], chain.id) seqs.append(sequence(chain_seq, seq_name)) found_protein = True if (not found_protein) : raise Sorry(("The PDB file %s does not contain any recognizable "+ "protein chains.") % file_name) else : try : seq_objects, non_compliant = any_sequence_format(file_name, assign_name_if_not_defined=True) seqs.extend(seq_objects) except Exception, e : raise Sorry(("Error parsing '%s' - not a recognizable sequence "+ "format. (Original message: %s)") % (file_name, str(e)))
def _try_as_seq (self) : # XXX hack to avoid choking on CCP4 maps assert (not self.file_name.endswith(".ccp4")) # XXX hack to avoid choking on NCS files: assert (not self.file_name.endswith(".ncs")) assert (not self.file_name.endswith(".ncs_spec")) from iotbx.bioinformatics import any_sequence_format objects, non_compliant = any_sequence_format(self.file_name) assert (objects is not None), "No sequence data found in file." assert (len(non_compliant) == 0), "Misformatted data in file." for seq_obj in objects : assert (not "-" in seq_obj.sequence) self._file_object = objects # self._try_as_txt() # assert len(self._file_object) != 0 # for _line in self._file_object.splitlines() : # assert not _line.startswith(" ") # line = re.sub(" ", "", _line) # assert ((len(line) == 0) or # (line[0] == ">") or # (line == "*") or # ((line[-1] == '*') and line[:-1].isalpha()) or # line.isalpha()) self._file_type = "seq"
def run (args, out=sys.stdout, verbose=True) : import mmtbx.building.extend_sidechains import mmtbx.command_line input_out = out if (not verbose) : input_out = null_out() cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=get_master_phil(), process_pdb_file=False, out=input_out, usage_string="""\ mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options] Rebuild sidechains with missing non-hydrogen atoms. Includes real-space refinement (but needs work).""") params = cmdline.params prefix = os.path.splitext(os.path.basename(params.input.pdb.file_name[0]))[0] pdb_hierarchy = cmdline.pdb_hierarchy xray_structure = cmdline.xray_structure if (cmdline.params.input.sequence is not None) : from iotbx.bioinformatics import any_sequence_format sequences, nc = any_sequence_format(cmdline.params.input.sequence) make_sub_header("Correcting model sequence", out=out) n_changed = mmtbx.building.extend_sidechains.correct_sequence( pdb_hierarchy=pdb_hierarchy, sequences=sequences, out=out) if (n_changed == 0) : print >> out, " No modifications required." else : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) cmdline.fmodel.update_xray_structure(xray_structure, update_f_calc=True) return mmtbx.building.extend_sidechains.extend_and_refine( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=cmdline.fmodel, params=params, prefix=prefix, cif_objects=[ co for fn, co in cmdline.cif_objects ], out=out, verbose=verbose, output_model=params.output_model, output_map_coeffs=params.output_map_coeffs)
def run(args, out=sys.stdout, verbose=True): import mmtbx.building.extend_sidechains import mmtbx.command_line input_out = out if (not verbose): input_out = null_out() cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=get_master_phil(), process_pdb_file=False, out=input_out, usage_string="""\ mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options] Rebuild sidechains with missing non-hydrogen atoms. Includes real-space refinement (but needs work).""") params = cmdline.params prefix = os.path.splitext(os.path.basename( params.input.pdb.file_name[0]))[0] pdb_hierarchy = cmdline.pdb_hierarchy xray_structure = cmdline.xray_structure if (cmdline.params.input.sequence is not None): from iotbx.bioinformatics import any_sequence_format sequences, nc = any_sequence_format(cmdline.params.input.sequence) make_sub_header("Correcting model sequence", out=out) n_changed = mmtbx.building.extend_sidechains.correct_sequence( pdb_hierarchy=pdb_hierarchy, sequences=sequences, out=out) if (n_changed == 0): print >> out, " No modifications required." else: xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) cmdline.fmodel.update_xray_structure(xray_structure, update_f_calc=True) return mmtbx.building.extend_sidechains.extend_and_refine( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=cmdline.fmodel, params=params, prefix=prefix, cif_objects=[co for fn, co in cmdline.cif_objects], out=out, verbose=verbose, output_model=params.output_model, output_map_coeffs=params.output_map_coeffs)
def get_residues_and_ha( seq_file=None, atom_type=None, chain_type=None, data=None, solvent_fraction=None, ncs_copies=None, out=sys.stdout ): if not seq_file or not os.path.isfile(seq_file): raise Sorry("Please supply number of residues or a sequence file") objects, non_compliant = any_sequence_format(seq_file) if non_compliant: raise Sorry("Sorry, unable to read the sequence file %s" % (seq_file)) n_aa, n_met, n_cys = 0, 0, 0 for seq_obj in objects: n_aa_, n_met_, n_cys_ = get_aa_and_met(sequence=seq_obj.sequence) n_aa += n_aa_ n_met += n_met_ n_cys += n_cys_ number_of_s = n_met + n_cys number_of_sites, number_of_sites_lowres = get_number_of_sites( atom_type=atom_type, n_met=n_met, n_cys=n_cys, n_aa=n_aa, ncs_copies=1, out=null_out() ) # if data file is specified, use it to get crystal_symmetry and then estimate # residues and ha using that information and seq_file. Otherwise guess if data and os.path.isfile(data): from phenix.command_line.ncs_and_number_of_ha import ncs_and_number_of_ha args = ["data=%s" % (data)] if seq_file: args.append("seq_file=%s" % (seq_file)) if atom_type: args.append("atom_type=%s" % (atom_type)) if chain_type: args.append("chain_type=%s" % (chain_type)) if ncs_copies: args.append("ncs_copies=%s" % (ncs_copies)) args.append("log=None") args.append("params_out=None") na = ncs_and_number_of_ha(args=args, out=null_out()) return na.ncs_copies * n_aa, na.number_of_sites, na.ncs_copies * number_of_s, na.solvent_fraction, na.ncs_copies else: return n_aa, number_of_sites, number_of_s, solvent_fraction, ncs_copies
def get_residues_and_ha(seq_file=None,atom_type=None, chain_type=None,data=None,solvent_fraction=None, ncs_copies=None,out=sys.stdout): if not seq_file or not os.path.isfile(seq_file): raise Sorry("Please supply number of residues or a sequence file") objects, non_compliant = any_sequence_format(seq_file) if non_compliant: raise Sorry("Sorry, unable to read the sequence file %s" %(seq_file)) n_aa, n_met, n_cys = 0, 0, 0 for seq_obj in objects : n_aa_,n_met_,n_cys_ = get_aa_and_met(sequence=seq_obj.sequence) n_aa += n_aa_ n_met += n_met_ n_cys += n_cys_ number_of_s=n_met+n_cys number_of_sites,number_of_sites_lowres=get_number_of_sites( atom_type=atom_type,n_met=n_met,n_cys=n_cys, n_aa=n_aa,ncs_copies=1,out=null_out()) # if data file is specified, use it to get crystal_symmetry and then estimate # residues and ha using that information and seq_file. Otherwise guess if data and os.path.isfile(data): from phenix.command_line.ncs_and_number_of_ha import ncs_and_number_of_ha args=["data=%s" %(data)] if seq_file: args.append("seq_file=%s" %(seq_file)) if atom_type: args.append("atom_type=%s" %(atom_type)) if chain_type: args.append("chain_type=%s" %(chain_type)) if ncs_copies: args.append("ncs_copies=%s" %(ncs_copies)) args.append("log=None") args.append("params_out=None") na=ncs_and_number_of_ha(args=args,out=null_out()) return na.ncs_copies*n_aa,na.number_of_sites,na.ncs_copies*number_of_s,\ na.solvent_fraction,na.ncs_copies else: return n_aa,number_of_sites,number_of_s,solvent_fraction,ncs_copies