示例#1
0
    def initialize(self):

        self.constraints = []

        self.cyanaLib = CyanaLibrary(version=self.version)

        self.constraintElements = 2
示例#2
0
  def initialize(self):

    self.constraints = []
    
    self.cyanaLib = CyanaLibrary(version = self.version)

    self.constraintElements = 2
    
    # There are two different types of file, the old ones have only one atom, new one has two atoms defined.
    self.singleAtom = True
    
    self.orientations = {}
示例#3
0
    def initialize(self, parent, saveFrame=None):

        self.constraints = []

        self.constraintElements = 4

        self.cyanaLibUsed = 0
        self.cyanaLib = CyanaLibrary()

        self.saveFrame = saveFrame

        self.parent = parent
        self.version = parent.version

        if self.saveFrame:
            self.parseSaveFrame()
示例#4
0
    def initialize(self):

        self.sequences = []

        self.cyanaLib = CyanaLibrary(version=self.version)
示例#5
0
class DyanaSequenceFile(DyanaGenericFile):
    # Information on file level
    def initialize(self):

        self.sequences = []

        self.cyanaLib = CyanaLibrary(version=self.version)

    def read(self, verbose=0):

        if verbose == 1:
            print "Reading %s sequence file %s" % (self.format, self.name)

        #
        # Parse the file
        #

        sequenceCols = []

        fin = open(self.name, 'rU')

        # Read, look for first line
        line = fin.readline()

        while line:
            cols = line.split()

            if len(cols) == 0 or self.patt['hash'].search(line):
                pass

            else:
                for col in cols:
                    if not self.patt['hash'].search(col):
                        sequenceCols.append(col)
                    else:
                        break

            line = fin.readline()

        fin.close()

        #
        # Loop through the sequence info from the file
        #
        # Will now remove the following (and start a new chain):
        #
        #
        # PL: Linker from a protein amino acid residue to a linker residue
        # LL: Linker residue with a virtual bond length of 1 A
        # LL2: Linker residue with a virtual bond length of 2 A
        # LL5: Linker residue with a virtual bond length of 5 A
        # LP: Linker from a linker residue to a protein amino acid residue
        #

        seqCode = 0
        seqColNum = 0

        lineErrors = []

        self.sequences.append(DyanaSequence())
        chainResiduesAdded = False

        while (seqColNum < len(sequenceCols)):

            seqCol = sequenceCols[seqColNum]

            if not self.patt['onlydigit'].search(seqCol):

                isLinker = False

                # Removing linker residues - not for CCPN...
                if seqCol in self.linkerResidueCodes:
                    if chainResiduesAdded:
                        # Create a new chain!
                        chainCode = self.chainCodesString[len(self.sequences) -
                                                          1]
                        self.sequences.append(
                            DyanaSequence(chainCode=chainCode))
                        print "  Warning: started new sequence '%s' based on linker residues." % chainCode
                        chainResiduesAdded = False

                    isLinker = True

                #
                # Check whether recognized...
                #

                if not self.cyanaLib.findResLabel(
                        seqCol) and not self.cyanaLib.findResLabel(seqCol[1:]):
                    lineErrors.append(
                        "  Warning: %s sequence element not recognized by standard CYANA library."
                        % seqCol)

                #
                # Check if next element is a number
                #

                if seqColNum + 1 < len(
                        sequenceCols) and self.patt['onlydigit'].search(
                            sequenceCols[seqColNum + 1]):

                    seqColNum += 1
                    seqCode = returnInt(sequenceCols[seqColNum])

                else:

                    seqCode += 1

                #
                # Set the data, if valid
                #

                if not isLinker:

                    self.sequences[-1].elements.append(
                        DyanaSequenceElement(seqCode, seqCol))
                    self.sequences[-1].elements[-1].setFormatCode(
                        self.sequences[-1].elements[-1].code3Letter)

                    chainResiduesAdded = True

            else:

                lineErrors.append(
                    "  Error: %s sequence element %s doesn't fit format." %
                    (self.format, seqCol))

            seqColNum += 1

        #
        # Validity check
        #

        fileReadOk = True

        numLineErrors = len(lineErrors)

        # Assume that if have a certain amount of errors, this file is not the right format...
        if numLineErrors > min((seqColNum * 0.1), 5):
            self.sequences[-1].elements = []
            fileReadOk = False
        else:
            for lineError in lineErrors:
                print lineError

        return fileReadOk

    def readFromCoordinates(self, coordinateFile, verbose=0):

        if verbose == 1:
            print "Extracting %s sequence from coordinate file %s" % (
                self.format, coordinateFile.name)

        seqCode = ""
        seqInsertCode = defaultSeqInsertCode

        chainCode = None

        modelNums = coordinateFile.modelCoordinates.keys()
        modelNums.sort()

        for coordinate in coordinateFile.modelCoordinates[modelNums[0]]:

            if chainCode != coordinate.refChainId:

                self.sequences.append(
                    DyanaSequence(chainCode=coordinate.refChainId))
                chainCode = coordinate.refChainId

            if seqCode != coordinate.seqCode or seqInsertCode != coordinate.insertionCode:

                #
                # New residue/item
                #

                seqCode = coordinate.seqCode
                seqInsertCode = coordinate.insertionCode

                fullSeqCode = str(seqCode) + seqInsertCode

                self.sequences[-1].elements.append(
                    DyanaSequenceElement(fullSeqCode, coordinate.resName))
                self.sequences[-1].elements[-1].setFormatCode(
                    coordinate.resName)

    def write(self, verbose=0):

        if verbose == 1:
            print "Writing %s sequence file %s" % (self.format, self.name)

        if len(self.sequences) > 1:
            print "Warning: multiple sequences - writing to same file."

        #
        # TODO TODO: have to fill in sequence gaps with linker residues!?>!
        #
        fout = open(self.name, 'w')

        for sequence in self.sequences:

            #
            # Write three letter codes (one per line)
            #

            for residue in sequence.elements:

                if residue.hasCisPeptideBond:
                    resText = 'c' + residue.formatCode
                else:
                    resText = residue.formatCode

                fout.write("%-4s %4d" % (resText, residue.seqCode))
                fout.write(self.newline)

        fout.close()
示例#6
0
class DyanaSequenceFile(DyanaGenericFile):
  # Information on file level
  def initialize(self):
  
    self.sequences = []

    self.cyanaLib = CyanaLibrary(version = self.version)

  def read(self,verbose = 0):

    if verbose == 1:
      print "Reading %s sequence file %s" % (self.format,self.name)

    self.sequences.append(DyanaSequence())

    sequenceCols = []

    fin = open(self.name, 'rU')

    # Read, look for first line
    line = fin.readline()

    while line:
      cols = line.split()

      if len(cols) == 0 or self.patt['hash'].search(line):
        pass

      else:
        sequenceCols.extend(cols)

      line = fin.readline()

    fin.close()

    #
    # Parse info
    #

    seqCode = 0
    seqColNum = 0
    
    lineErrors = []

    while (seqColNum < len(sequenceCols)):

      seqCol = sequenceCols[seqColNum]

      if not self.patt['onlydigit'].search(seqCol):
        
        if not self.cyanaLib.findResLabel(seqCol):
          lineErrors.append("  Warning: %s sequence element not recognized by standard CYANA library." % seqCol)

        #
        # Check if next element is a number
        #

        if seqColNum + 1 < len(sequenceCols) and self.patt['onlydigit'].search(sequenceCols[seqColNum + 1]):

          seqColNum += 1
          seqCode = returnInt(sequenceCols[seqColNum])

        else:

          seqCode += 1

        #
        # Set the data, if valid
        #

        self.sequences[-1].elements.append(DyanaSequenceElement(seqCode,seqCol))
        self.sequences[-1].elements[-1].setFormatCode(seqCol)

      else:

        lineErrors.append("  Error: %s sequence element %s doesn't fit format." % (self.format,seqCol))

      seqColNum += 1
      
    #
    # Validity check
    #
    
    fileReadOk = True
    
    numLineErrors = len(lineErrors)
    
    # Assume that if have a certain amount of errors, this file is not the right format...
    if numLineErrors > min((seqColNum * 0.1),5):
      self.sequences[-1].elements = []
      fileReadOk = False
    else:
      for lineError in lineErrors:
        print lineError
    
    return fileReadOk

  def readFromCoordinates(self,coordinateFile, verbose = 0):
  
    if verbose == 1:
      print "Extracting %s sequence from coordinate file %s" % (self.format,coordinateFile.name)

    self.sequences.append(DyanaSequence())

    seqCode = ""
    seqInsertCode = defaultSeqInsertCode
    
    modelNums = coordinateFile.modelCoordinates.keys()
    modelNums.sort()

    for coordinate in coordinateFile.modelCoordinates[modelNums[0]]:

      if seqCode != coordinate.seqCode or seqInsertCode != coordinate.insertionCode:

        #
        # New residue/item
        # 
        
        seqCode = coordinate.seqCode
        seqInsertCode = coordinate.insertionCode

        fullSeqCode = str(seqCode) + seqInsertCode

        self.sequences[-1].elements.append(DyanaSequenceElement(fullSeqCode,coordinate.resName))
        self.sequences[-1].elements[-1].setFormatCode(coordinate.resName)

  def write(self,verbose = 0):

    if verbose == 1:
      print "Writing %s sequence file %s" % (self.format,self.name)

    if len(self.sequences) > 1:
      print "Warning: multiple sequences - writing to same file."        

    #
    # TODO TODO: have to fill in sequence gaps with linker residues!?>!
    #
    fout = open(self.name,'w')

    for sequence in self.sequences:

      #
      # Write three letter codes (one per line)
      #

      for residue in sequence.elements:

        if residue.hasCisPeptideBond:
          resText = 'c' + residue.origCode3Letter
        else:
          resText = residue.origCode3Letter

        fout.write("%-4s %4d" % (resText,residue.seqCode))
        fout.write(self.newline)

    fout.close()