Exemplo n.º 1
0
    def _try_as_seq(self):
        # XXX hack to avoid choking on CCP4 maps
        assert (not self.file_name.endswith(".ccp4"))
        # XXX hack to avoid choking on NCS files:
        assert (not self.file_name.endswith(".ncs"))
        assert (not self.file_name.endswith(".ncs_spec"))

        from iotbx.bioinformatics import any_sequence_format
        objects, non_compliant = any_sequence_format(self.file_name)
        assert (objects is not None), "No sequence data found in file."
        assert (len(non_compliant) == 0), "Misformatted data in file."
        for seq_obj in objects:
            assert (not "-" in seq_obj.sequence)
        self._file_object = objects
        #    self._try_as_txt()
        #    assert len(self._file_object) != 0
        #    for _line in self._file_object.splitlines() :
        #      assert not _line.startswith(" ")
        #      line = re.sub(" ", "", _line)
        #      assert ((len(line) == 0) or
        #              (line[0] == ">") or
        #              (line == "*") or
        #              ((line[-1] == '*') and line[:-1].isalpha()) or
        #              line.isalpha())
        self._file_type = "seq"
Exemplo n.º 2
0
def run (args=(), params=None, out=sys.stdout) :
  assert (params is not None)
  seq_files = params.muscle.seq_file
  output_file = params.muscle.output_file
  if (output_file is None) or (output_file == "") :
    output_file = os.path.join(os.getcwd(), "muscle.aln")
  from iotbx import file_reader
  from iotbx.bioinformatics import any_sequence_format, sequence
  seqs = []
  for file_name in seq_files :
    if (file_name.endswith(".pdb") or file_name.endswith(".ent") or
        file_name.endswith(".pdb.gz") or file_name.endswith(".ent.gz")) :
      pdb_in = file_reader.any_file(file_name, force_type="pdb").file_object
      hierarchy = pdb_in.hierarchy
      first_model = hierarchy.models()[0]
      found_protein = False
      for chain in first_model.chains() :
        if chain.is_protein() :
          chain_seq = chain.as_padded_sequence()
          base_name = os.path.basename(file_name)
          seq_name = "%s_%s" % (os.path.splitext(base_name)[0], chain.id)
          seqs.append(sequence(chain_seq, seq_name))
          found_protein = True
      if (not found_protein) :
        raise Sorry(("The PDB file %s does not contain any recognizable "+
          "protein chains.") % file_name)
    else :
      try :
        seq_objects, non_compliant = any_sequence_format(file_name,
          assign_name_if_not_defined=True)
        seqs.extend(seq_objects)
      except Exception, e :
        raise Sorry(("Error parsing '%s' - not a recognizable sequence "+
          "format.  (Original message: %s)") % (file_name, str(e)))
Exemplo n.º 3
0
  def _try_as_seq (self) :
    # XXX hack to avoid choking on CCP4 maps
    assert (not self.file_name.endswith(".ccp4"))
    # XXX hack to avoid choking on NCS files:
    assert (not self.file_name.endswith(".ncs"))
    assert (not self.file_name.endswith(".ncs_spec"))

    from iotbx.bioinformatics import any_sequence_format
    objects, non_compliant = any_sequence_format(self.file_name)
    assert (objects is not None), "No sequence data found in file."
    assert (len(non_compliant) == 0), "Misformatted data in file."
    for seq_obj in objects :
      assert (not "-" in seq_obj.sequence)
    self._file_object = objects
#    self._try_as_txt()
#    assert len(self._file_object) != 0
#    for _line in self._file_object.splitlines() :
#      assert not _line.startswith(" ")
#      line = re.sub(" ", "", _line)
#      assert ((len(line) == 0) or
#              (line[0] == ">") or
#              (line == "*") or
#              ((line[-1] == '*') and line[:-1].isalpha()) or
#              line.isalpha())
    self._file_type = "seq"
Exemplo n.º 4
0
def run (args=(), params=None, out=sys.stdout) :
  assert (params is not None)
  seq_files = params.muscle.seq_file
  output_file = params.muscle.output_file
  if (output_file is None) or (output_file == "") :
    output_file = os.path.join(os.getcwd(), "muscle.aln")
  from iotbx import file_reader
  from iotbx.bioinformatics import any_sequence_format, sequence
  seqs = []
  for file_name in seq_files :
    if (file_name.endswith(".pdb") or file_name.endswith(".ent") or
        file_name.endswith(".pdb.gz") or file_name.endswith(".ent.gz")) :
      pdb_in = file_reader.any_file(file_name, force_type="pdb").file_object
      hierarchy = pdb_in.hierarchy
      first_model = hierarchy.models()[0]
      found_protein = False
      for chain in first_model.chains() :
        if chain.is_protein() :
          chain_seq = chain.as_padded_sequence()
          base_name = os.path.basename(file_name)
          seq_name = "%s_%s" % (os.path.splitext(base_name)[0], chain.id)
          seqs.append(sequence(chain_seq, seq_name))
          found_protein = True
      if (not found_protein) :
        raise Sorry(("The PDB file %s does not contain any recognizable "+
          "protein chains.") % file_name)
    else :
      try :
        seq_objects, non_compliant = any_sequence_format(file_name,
          assign_name_if_not_defined=True)
        seqs.extend(seq_objects)
      except Exception, e :
        raise Sorry(("Error parsing '%s' - not a recognizable sequence "+
          "format.  (Original message: %s)") % (file_name, str(e)))
Exemplo n.º 5
0
def run (args, out=sys.stdout, verbose=True) :
  import mmtbx.building.extend_sidechains
  import mmtbx.command_line
  input_out = out
  if (not verbose) :
    input_out = null_out()
  cmdline = mmtbx.command_line.load_model_and_data(
    args=args,
    master_phil=get_master_phil(),
    process_pdb_file=False,
    out=input_out,
    usage_string="""\
mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options]

Rebuild sidechains with missing non-hydrogen atoms.  Includes real-space
refinement (but needs work).""")
  params = cmdline.params
  prefix = os.path.splitext(os.path.basename(params.input.pdb.file_name[0]))[0]
  pdb_hierarchy = cmdline.pdb_hierarchy
  xray_structure = cmdline.xray_structure
  if (cmdline.params.input.sequence is not None) :
    from iotbx.bioinformatics import any_sequence_format
    sequences, nc = any_sequence_format(cmdline.params.input.sequence)
    make_sub_header("Correcting model sequence", out=out)
    n_changed = mmtbx.building.extend_sidechains.correct_sequence(
      pdb_hierarchy=pdb_hierarchy,
      sequences=sequences,
      out=out)
    if (n_changed == 0) :
      print >> out, "  No modifications required."
    else :
      xray_structure = pdb_hierarchy.extract_xray_structure(
        crystal_symmetry=xray_structure.crystal_symmetry())
      cmdline.fmodel.update_xray_structure(xray_structure,
        update_f_calc=True)
  return mmtbx.building.extend_sidechains.extend_and_refine(
    pdb_hierarchy=pdb_hierarchy,
    xray_structure=xray_structure,
    fmodel=cmdline.fmodel,
    params=params,
    prefix=prefix,
    cif_objects=[ co for fn, co in cmdline.cif_objects ],
    out=out,
    verbose=verbose,
    output_model=params.output_model,
    output_map_coeffs=params.output_map_coeffs)
Exemplo n.º 6
0
def run(args, out=sys.stdout, verbose=True):
    import mmtbx.building.extend_sidechains
    import mmtbx.command_line
    input_out = out
    if (not verbose):
        input_out = null_out()
    cmdline = mmtbx.command_line.load_model_and_data(
        args=args,
        master_phil=get_master_phil(),
        process_pdb_file=False,
        out=input_out,
        usage_string="""\
mmtbx.extend_sidechains model.pdb data.mtz [restraints.cif] [options]

Rebuild sidechains with missing non-hydrogen atoms.  Includes real-space
refinement (but needs work).""")
    params = cmdline.params
    prefix = os.path.splitext(os.path.basename(
        params.input.pdb.file_name[0]))[0]
    pdb_hierarchy = cmdline.pdb_hierarchy
    xray_structure = cmdline.xray_structure
    if (cmdline.params.input.sequence is not None):
        from iotbx.bioinformatics import any_sequence_format
        sequences, nc = any_sequence_format(cmdline.params.input.sequence)
        make_sub_header("Correcting model sequence", out=out)
        n_changed = mmtbx.building.extend_sidechains.correct_sequence(
            pdb_hierarchy=pdb_hierarchy, sequences=sequences, out=out)
        if (n_changed == 0):
            print >> out, "  No modifications required."
        else:
            xray_structure = pdb_hierarchy.extract_xray_structure(
                crystal_symmetry=xray_structure.crystal_symmetry())
            cmdline.fmodel.update_xray_structure(xray_structure,
                                                 update_f_calc=True)
    return mmtbx.building.extend_sidechains.extend_and_refine(
        pdb_hierarchy=pdb_hierarchy,
        xray_structure=xray_structure,
        fmodel=cmdline.fmodel,
        params=params,
        prefix=prefix,
        cif_objects=[co for fn, co in cmdline.cif_objects],
        out=out,
        verbose=verbose,
        output_model=params.output_model,
        output_map_coeffs=params.output_map_coeffs)
Exemplo n.º 7
0
def get_residues_and_ha(
    seq_file=None, atom_type=None, chain_type=None, data=None, solvent_fraction=None, ncs_copies=None, out=sys.stdout
):

    if not seq_file or not os.path.isfile(seq_file):
        raise Sorry("Please supply number of residues or a sequence file")
    objects, non_compliant = any_sequence_format(seq_file)
    if non_compliant:
        raise Sorry("Sorry, unable to read the sequence file %s" % (seq_file))
    n_aa, n_met, n_cys = 0, 0, 0
    for seq_obj in objects:
        n_aa_, n_met_, n_cys_ = get_aa_and_met(sequence=seq_obj.sequence)
        n_aa += n_aa_
        n_met += n_met_
        n_cys += n_cys_
    number_of_s = n_met + n_cys
    number_of_sites, number_of_sites_lowres = get_number_of_sites(
        atom_type=atom_type, n_met=n_met, n_cys=n_cys, n_aa=n_aa, ncs_copies=1, out=null_out()
    )

    # if data file is specified, use it to get crystal_symmetry and then estimate
    # residues and ha using that information and seq_file. Otherwise guess
    if data and os.path.isfile(data):
        from phenix.command_line.ncs_and_number_of_ha import ncs_and_number_of_ha

        args = ["data=%s" % (data)]
        if seq_file:
            args.append("seq_file=%s" % (seq_file))
        if atom_type:
            args.append("atom_type=%s" % (atom_type))
        if chain_type:
            args.append("chain_type=%s" % (chain_type))
        if ncs_copies:
            args.append("ncs_copies=%s" % (ncs_copies))
        args.append("log=None")
        args.append("params_out=None")
        na = ncs_and_number_of_ha(args=args, out=null_out())
        return na.ncs_copies * n_aa, na.number_of_sites, na.ncs_copies * number_of_s, na.solvent_fraction, na.ncs_copies
    else:
        return n_aa, number_of_sites, number_of_s, solvent_fraction, ncs_copies
Exemplo n.º 8
0
def get_residues_and_ha(seq_file=None,atom_type=None,
      chain_type=None,data=None,solvent_fraction=None,
      ncs_copies=None,out=sys.stdout):

  if not seq_file or not os.path.isfile(seq_file):
    raise Sorry("Please supply number of residues or a sequence file")
  objects, non_compliant = any_sequence_format(seq_file)
  if non_compliant:
    raise Sorry("Sorry, unable to read the sequence file %s" %(seq_file))
  n_aa, n_met, n_cys = 0, 0, 0
  for seq_obj in objects :
    n_aa_,n_met_,n_cys_ = get_aa_and_met(sequence=seq_obj.sequence)
    n_aa += n_aa_
    n_met += n_met_
    n_cys += n_cys_
  number_of_s=n_met+n_cys
  number_of_sites,number_of_sites_lowres=get_number_of_sites(
      atom_type=atom_type,n_met=n_met,n_cys=n_cys,
      n_aa=n_aa,ncs_copies=1,out=null_out())

  # if data file is specified, use it to get crystal_symmetry and then estimate
  # residues and ha using that information and seq_file. Otherwise guess
  if data and os.path.isfile(data):
    from phenix.command_line.ncs_and_number_of_ha import ncs_and_number_of_ha
    args=["data=%s" %(data)]
    if seq_file: args.append("seq_file=%s" %(seq_file))
    if atom_type: args.append("atom_type=%s" %(atom_type))
    if chain_type: args.append("chain_type=%s" %(chain_type))
    if ncs_copies: args.append("ncs_copies=%s" %(ncs_copies))
    args.append("log=None")
    args.append("params_out=None")
    na=ncs_and_number_of_ha(args=args,out=null_out())
    return na.ncs_copies*n_aa,na.number_of_sites,na.ncs_copies*number_of_s,\
      na.solvent_fraction,na.ncs_copies
  else:
    return n_aa,number_of_sites,number_of_s,solvent_fraction,ncs_copies