Пример #1
0
    def hits(self):

        from iotbx import bioinformatics

        for h in self.root.hits:
            pieces = h.identifier.split("_")
            assert 0 < len(pieces)

            if len(pieces) == 1:
                pdb = pieces[0]
                chain = ""

            else:
                (pdb, chain) = pieces[:2]

            alignment = bioinformatics.clustal_alignment(
                names=["target", "%s_%s" % (pdb, chain)],
                alignments=[
                    h.alignments[0].query.seq, h.alignments[0].match.seq
                ],
                program="WU-BLAST")
            yield bioinformatics.homology_search_hit(identifier=pdb,
                                                     chain=chain,
                                                     annotation=h.description,
                                                     alignment=alignment)
Пример #2
0
  def hits(self):

    names = self.alignment.names[ 1 : self.max_count ]
    alignments = self.alignment.alignments[ 1 : self.max_count ]

    try:
      descriptions = self.alignment.descriptions[ 1 : self.max_count ]

    except AttributeError:
      descriptions = alignments

    base = self.alignment.alignments[0]

    for ( n, seq, desc ) in zip( names, alignments, descriptions ):
      pieces = n.split( "_" )
      assert 0 < len( pieces )

      if len( pieces ) == 1:
        pdb = pieces[0]
        chain = ""

      else:
        ( pdb, chain ) = pieces[:2]

      alignment = bioinformatics.clustal_alignment(
        names = [ "target", "%s_%s" % ( pdb, chain ) ],
        alignments = [ base, seq ],
        program = "<unknown>"
        )
      yield bioinformatics.homology_search_hit(
        identifier = pdb,
        chain = chain,
        annotation = desc,
        alignment = alignment
        )
Пример #3
0
def get_muscle_alignment_ordered(sequences, out = None):

  from iotbx import bioinformatics

  name_for = {}

  for ( i, seq ) in enumerate( sequences, start = 1 ):
    name = name_for.get( seq, "Chain_%d" % i )
    name_for[ seq ] = name

  alignment = get_muscle_alignment(
    fasta_sequences = "\n".join(
      str( bioinformatics.sequence( name = name, sequence = seq.sequence ) )
      for ( seq, name ) in name_for.items()
      ),
    out = out,
    )

  lookup = dict( zip( alignment.names, alignment.alignments ) )
  assert all( n in lookup for n in name_for.values() )

  return bioinformatics.clustal_alignment(
    names = [ seq.name for seq in sequences ],
    alignments = [ lookup[ name_for[ seq ] ] for seq in sequences ],
    program = alignment.program
    )
Пример #4
0
    def hits(self):

        if not self.root.iterations:
            raise StopIteration

        from iotbx import bioinformatics

        for h in self.root.iterations[-1].hits:
            pieces = h.accession.split("_")
            assert 0 < len(pieces)

            if len(pieces) == 1:
                pdb = pieces[0]
                chain = ""

            else:
                (pdb, chain) = pieces[:2]

            alignment = bioinformatics.clustal_alignment(
                names=["target", "%s_%s" % (pdb, chain)],
                alignments=[h.hsps[0].query.seq, h.hsps[0].hit.seq],
                program="NCBI-BLAST")
            yield bioinformatics.homology_search_hit(identifier=pdb,
                                                     chain=chain,
                                                     annotation=h.annotation,
                                                     alignment=alignment)
Пример #5
0
  def hits(self):

    if not self.root.iterations:
      raise StopIteration

    from iotbx import bioinformatics

    for h in self.root.iterations[-1].hits:
      pieces = h.accession.split( "_" )
      assert 0 < len( pieces )

      if len( pieces ) == 1:
        pdb = pieces[0]
        chain = ""

      else:
        ( pdb, chain ) = pieces[:2]

      alignment = bioinformatics.clustal_alignment(
        names = [ "target", "%s_%s" % ( pdb, chain ) ],
        alignments = [ h.hsps[0].query.seq, h.hsps[0].hit.seq ],
        program = "NCBI-BLAST"
        )
      yield bioinformatics.homology_search_hit(
        identifier = pdb,
        chain = chain,
        annotation = h.annotation,
        alignment = alignment
        )
Пример #6
0
  def hits(self):

    from iotbx import bioinformatics

    for h in self.root.hits:
      pieces = h.identifier.split( "_" )
      assert 0 < len( pieces )

      if len( pieces ) == 1:
        pdb = pieces[0]
        chain = ""

      else:
        ( pdb, chain ) = pieces[:2]

      alignment = bioinformatics.clustal_alignment(
        names = [ "target", "%s_%s" % ( pdb, chain ) ],
        alignments = [ h.alignments[0].query.seq, h.alignments[0].match.seq ],
        program = "WU-BLAST"
        )
      yield bioinformatics.homology_search_hit(
        identifier = pdb,
        chain = chain,
        annotation = h.description,
        alignment = alignment
        )
Пример #7
0
def get_muscle_alignment_ordered(sequences, out = None):

  from iotbx import bioinformatics
  from iotbx.pdb.amino_acid_codes import validate_sequence

  name_for = {}

  for ( i, seq ) in enumerate( sequences, start = 1 ):
    name = name_for.get( seq, "Chain_%d" % i )
    name_for[ seq ] = name

  alignment, errors = get_muscle_alignment(
    fasta_sequences = "\n".join(
      str( bioinformatics.sequence( name = name, sequence = seq.sequence ) )
      for ( seq, name ) in name_for.items()
      ),
    out = out,
    )

  # check for errors and handle:
  #   invalid characters in sequences
  if (len(errors) > 0):
    for error in errors:
      error = error.strip()
      if ('Invalid character' in error):
        for seq in name_for.keys():
          invalid = validate_sequence(
            seq.sequence, protein=True, strict_protein=False,
            nucleic_acid=True, strict_nucleic_acid=False)
          if (len(invalid) > 0):
            name_for.pop(seq)
        sequences = name_for.keys()
      elif (len(error) > 0):
        raise Sorry(error)

  lookup = dict( zip( alignment.names, alignment.alignments ) )
  assert all( n in lookup for n in name_for.values() )

  return bioinformatics.clustal_alignment(
    names = [ seq.name for seq in sequences ],
    alignments = [ lookup[ name_for[ seq ] ] for seq in sequences ],
    program = alignment.program
    )