Пример #1
0
def copy_from_convention(from_convention, new_convention, protein_only=True):
    """Copy nomenclature convention from from_convention to new_convention.

    Only copy standard protein residues if protein_only (defaults to True).
    """
    residue_definitions = NTdb.residuesWithProperties("protein")
    if not protein_only:
        residue_definitions = NTdb.allResidueDefs()

    for res_def in residue_definitions:
        nTdebug("Copying %s nomenclature convention to %s for %s", from_convention, new_convention, res_def)
        res_def.nameDict[new_convention] = res_def.nameDict[from_convention]
        for atom_def in res_def:
            atom_def.nameDict[new_convention] = atom_def.nameDict[from_convention]
            atom_def.postProcess()
        res_def.postProcess()
Пример #2
0
 def testSelectByItems(self):
     #        E.g. if adl is the AtomDef NTlist
     byItems = ('type', 'C_VIN')
     vadl = NTdb.allAtomDefs().selectByItems(*byItems)
     #       vadl = adl.
     nTdebug("%s in db: %s" % (byItems[1], str(vadl)))
     self.assertTrue(len(vadl) >= 11)  # allow growth but not shrinkage.
Пример #3
0
    def testSelectByItems(self):
#        E.g. if adl is the AtomDef NTlist
        byItems = ( 'type', 'C_VIN' )
        vadl = NTdb.allAtomDefs().selectByItems( *byItems )
#       vadl = adl.
        nTdebug("%s in db: %s" % (byItems[1], str(vadl)))
        self.assertTrue( len(vadl) >= 11 ) # allow growth but not shrinkage.
Пример #4
0
def correct_xplor_stap(protein_only=True):
    """Correct atom definitions copied from XPLOR for STAP.

    Only correct standard protein residues if protein_only (defaults to True).
    """
    remove_non_stap_residues(NTdb)
    correct_his_stap(NTdb)
    correct_hg_stap(NTdb['CYS'])
    correct_hg_stap(NTdb['SER'])
    correct_ile_d_stap(NTdb['ILE'])

    residue_definitions = NTdb.residuesWithProperties('protein')
    if not protein_only:
        residue_definitions = NTdb.allResidueDefs()

    for res_def in residue_definitions:
        correct_termini_stap(res_def)
        remove_pseudo_atoms(res_def)
Пример #5
0
def correct_xplor_stap(protein_only=True):
    """Correct atom definitions copied from XPLOR for STAP.

    Only correct standard protein residues if protein_only (defaults to True).
    """
    remove_non_stap_residues(NTdb)
    correct_his_stap(NTdb)
    correct_hg_stap(NTdb['CYS'])
    correct_hg_stap(NTdb['SER'])
    correct_ile_d_stap(NTdb['ILE'])

    residue_definitions = NTdb.residuesWithProperties('protein')
    if not protein_only:
        residue_definitions = NTdb.allResidueDefs()

    for res_def in residue_definitions:
        correct_termini_stap(res_def)
        remove_pseudo_atoms(res_def)
Пример #6
0
def copy_from_convention(from_convention, new_convention, protein_only=True):
    """Copy nomenclature convention from from_convention to new_convention.

    Only copy standard protein residues if protein_only (defaults to True).
    """
    residue_definitions = NTdb.residuesWithProperties('protein')
    if not protein_only:
        residue_definitions = NTdb.allResidueDefs()

    for res_def in residue_definitions:
        nTdebug("Copying %s nomenclature convention to %s for %s",
                from_convention, new_convention, res_def)
        res_def.nameDict[new_convention] = res_def.nameDict[from_convention]
        for atom_def in res_def:
            atom_def.nameDict[new_convention] = atom_def.nameDict[
                from_convention]
            atom_def.postProcess()
        res_def.postProcess()
Пример #7
0
    def matchResidue2Cing(self, res):
        """
        Match res to CING database using previously defined convention;
        Account for 'ill-defined' residues by examining crucial atom names.
        Use CYANA (==DIANA) Naming for conversion to INTERNAL (i.e. These names will not likely change)

        Return NTdb resDef object None on Error

        res is a NTtree object with the following attributes set after this routine:
            db
            skip
            resName    and attributes for every atom it includes:
            HA2, CD1, ...
        """

#        nTdebug("Now in _matchResidue2Cing: %s" % res)

        res.db = None
        res.skip = False

        # Residue names that are ambiguously defined by different PDB file formats
        if res.resName[0:3] == 'ARG':
            if 'HH1' in res:
                res.db = NTdb.getResidueDefByName('ARG', convention = CYANA)
            elif '1HH' in res: # Second set for CYANA 1.x, AMBER
                res.db = NTdb.getResidueDefByName('ARG', convention = CYANA)
            else:
                # Default protonated; this also assures most common for X-ray without protons
                res.db = NTdb.getResidueDefByName('ARG+', convention = CYANA)
            #end if
        #end if
        elif res.resName[0:3] == 'ASP':
            if 'HD2' in res:
                #print 'ASPH'
                res.db = NTdb.getResidueDefByName('ASP', convention = CYANA)
            else:
                # Default deprot; this also assures most common for X-ray without protons
                #print 'ASP'
                res.db = NTdb.getResidueDefByName('ASP-', convention = CYANA)
            #end if
        elif res.resName[0:3] == 'GLU':
            if 'HE2' in res:
                #print 'GLUH'
                res.db = NTdb.getResidueDefByName('GLU', convention = CYANA)
            else:
                # Default deprot; this also assures most common for X-ray without protons
                #print 'GLU'
                res.db = NTdb.getResidueDefByName('GLU-', convention = CYANA)
            #end if
        elif res.resName[0:3] == 'HIS':
            if 'HD1' in res and 'HE2' in res:
                #print 'HISH'
                res.db = NTdb.getResidueDefByName('HIS+', convention = CYANA)
            elif not 'HD1' in res and 'HE2' in res:
                # print HISE
                res.db = NTdb.getResidueDefByName('HIST', convention = CYANA)
            else:
                # Default HD1
                #print 'HIS'
                res.db = NTdb.getResidueDefByName('HIS', convention = CYANA)
            #end if
        elif res.resName[0:3] == 'LYS':
            if ('HZ1' in res and not 'HZ3' in res):
                res.db = NTdb.getResidueDefByName('LYS', convention = CYANA)
            elif ('1HZ' in res and not '3HZ' in res): # Second set for CYANA 1.x
                res.db = NTdb.getResidueDefByName('LYS', convention = CYANA)
            else:
                # Default prot; this also assures most common for X-ray without protons
                res.db = NTdb.getResidueDefByName('LYS+', convention = CYANA)
            #end if
        elif res.resName in CYANA_NON_RESIDUES:
            res.skip = True
        elif res.resName == 'HOH' and self.skipWaters:
            res.skip = True
        else:
            res.db = NTdb.getResidueDefByName(res.resName, convention = self.convention)
        #end if

        # Only continue the search if not found and non-standard residues are allowed.
        if res.db:
            return res.db

        if not self.allowNonStandardResidue:
            res.skip = True
            return res.db

        # Try to match the residue using INTERNAL convention.
        res.db = NTdb.getResidueDefByName(res.resName)
        if res.db:
            return res.db

#        insert new residue.
        res.db = NTdb.appendResidueDef(name = res.resName, shortName = '_', comment='From parsing PDB file')
        if not res.db:
            nTcodeerror("Adding a non-standard residue should have been possible.")
            return None
        res.db.nameDict[self.convention] = res.resName

        # Just a check, disable for speed.
        _x = NTdb.getResidueDefByName(res.resName)
        if not _x:
            nTcodeerror("Added residue but failed to find it again in pdbParser#_matchResidue2Cing")

        return res.db
Пример #8
0
from cing.core.classes import DihedralRestraint, DihedralRestraintList
from cing.core.classes import RDCRestraint, RDCRestraintList

#---------------------------------------------------------------------------------------------
# functional imports: Order matters!
#---------------------------------------------------------------------------------------------

# Try a Yasara import
# GV: We could change this by defining yasaradir in the CING setup
try:
    from yasara import yasaradir  #@UnresolvedImport # JFD: why not add the functionality from the plugin ?
    if os.path.exists(yasaradir):
        sys.path.append(os.path.join(yasaradir, 'pym'))
        sys.path.append(os.path.join(yasaradir, 'plg'))
    else:
        nTcodeerror(
            'Yasara directory "%s" as defined in yasara.py module not found',
            yasaradir)
        exit(1)
except:
    yasaradir = None
#end try

from cing.core.molecule import *
from cing.core.importPlugin import importPlugin  # This imports all plugins
from cing.core.sml import obj2SML  # This also initializes the SMLhandler methods
from cing.core.sml import sML2obj  # This also initializes the SMLhandler methods

from cing.core.database import NTdb  #@Reimport
NTdb._restoreFromSML()  # This initializes the database
Пример #9
0
    # parse this xplor pdbfile
    pdbfile = PDBFile( fName )

    # print a MODEL record
    modelCount += 1
    mdl         = MODEL()
    mdl.serial  = modelCount
    fprintf( pdbFile, '%s\n', mdl )

    atomCount = 0
    lastRecord = None
    for record in pdbfile:
        if record._name.strip() in ["ATOM","HETATM"]:

            # see if we can find a definition for this residue, atom name in the database
            atm = NTdb.getAtomDefByName( record.resName, record.name, XPLOR )

            # we found a match
            if (atm != None):
                # check if there is an convention equivalent; skip otherwise
                if (atm.translate(convention) != None and atm.residueDef.translate(convention) != None):
                    atomCount     += 1
                    record.serial  = atomCount
                    record.resName = atm.residueDef.translate( convention )
                    record.name    = atm.translate( convention )
                    if not 'chainID' in record:
                        record.chainID = 'A'
                    #end if
                    fprintf( pdbFile, "%s\n", record )
                    lastRecord = record
                else:
Пример #10
0
def plotHistogramOverall():
    graphicsFormat = "png"
    alpha = 0.8 # was 0.8; looks awful with alpha = 1
    n = 20
#    d = 3 # number of ss types.
    extent = (0, n) + (0, n)
    cmapList = [   green_inv, blue_inv, yellow_inv ]
    colorList = [ 'green', 'blue', 'yellow']
    i = 1 # decides on color picked.

    # If set it will do a single ssType otherwise the overall.
    for doOverall in [ False, True ]:
#    for doOverall in [ True ]:
        if doOverall:
            ssTypeList = [ None ]
        else:
            ssTypeList = [' ', 'S', 'H']

        for ssType in ssTypeList:
            m = zeros((n * n), dtype=int).reshape(n, n)
        #    mBySs = zeros((n,n,d), dtype=int).reshape(n,n,d)
            tickList = [ NTdb.getResidueDefByName(resType).shortName for resType in common20AAList]
    #        tickListRev = tickList[:]
    #        tickListRev.reverse()
            for r, resTypePrev in enumerate(common20AAList):
                for c, resType in enumerate(common20AAList):
                    if doOverall:
                        hist1 = getDeepByKeys(hPlot.histd1ByResTypes, resType, resTypePrev)
                    else:
                        hist1 = getDeepByKeys(hPlot.histd1BySs0AndResTypes, ssType, resType, resTypePrev)
                    if hist1 == None:
                        nTdebug('skipping for hist1 is empty for [%s] [%s]' % (resType, resTypePrev))
                        continue
                    m[r, c] = sum(hist1)

            clf()

#            axes([.1, .1, .8, .8 ] )
            xlabel('resType')
            ylabel('resTypePrev')
            xlim((0, n))
            ylim((0, n))
            offset = 0.5
            xticks(arange(offset, n), tickList)
            yticks(arange(offset, n), tickList)
#            print 'just before call to set_ticks_position'
    #        axis.xaxis.set_ticks_position('top')
    #        axis.xaxis.set_label_position('top')
        #    axis.yaxis.set_ticks_position('both')
        #    axis.yaxis.set_label_position('left')
            grid(True)
            strTitle = "ssType: [%s]" % ssType
            title(strTitle)
            plot([0, n], [0, n], 'b-', linewidth=1)
            minCount = 300.
            maxCount = 1000.
            if False:
                minCount = 0.
                maxCount = 1.
            if ssType:
                minCount /= 3.
                maxCount /= 3.
            maxHist = amax(m)
            minHist = amin(m)
            sumHist = sum(m)
            nTmessage('ssType: %s' % ssType)
            nTmessage('maxHist: %s' % maxHist) # 9165 of total of ~ 1 M.
            nTmessage('minHist: %s' % minHist) # 210
            nTmessage('sumHist: %s' % sumHist) # 210
#            nTmessage('tickList: %s' % tickList) # 210
        #    his *= 100./maxHist
            his = masked_where(m <= minCount, m, copy=1)

            palette = cmapList[i]
            palette.set_under(color='red', alpha=1.0) # alpha is 0.0
            palette.set_over(color=colorList[i], alpha=1.0) # alpha is 1.0 Important to make it a hard alpha; last plotted will rule.
            palette.set_bad(color='red', alpha=1.0)


            norm = Normalize(vmin=minCount, vmax=maxCount, clip=True) # clip is False
            imshow(his,
                    interpolation='nearest',
        #            interpolation='bicubic',
                    origin='lower',
                    extent=extent,
                    alpha=alpha,
                    cmap=palette,
                    norm=norm)
#            mr = m[::-1] # reverses the rows, nice!
#            nTmessage('mr: %s' % mr)

            fn = "plotHistogram_%s_d1d2.%s" % (ssType, graphicsFormat)
            savefig(fn)

            clf()
            a = m.reshape(n * n)
            hist(a, 20)
            xlabel('pair count')
            ylabel('number of occurrences')
            title(strTitle)
            fn = "plotHistOfHist_%s_d1d2.%s" % (ssType, graphicsFormat)
            savefig(fn)

        # end loop over ssType
    # end over ssType overall
    return m
Пример #11
0
from cing.core.constants import * #@UnusedWildImport
from cing.core.database import NTdb

for res in NTdb:
    res.nameDict[CYANA2] = res.nameDict[CYANA]
    for atm in res:
        if (atm.name == 'HN'):
            atm.nameDict[CYANA2] = 'H'
        else:
            atm.nameDict[CYANA2] = atm.nameDict[CYANA]

stream = open('dbTable-new.py', 'w')
NTdb.exportDef(stream=stream)
stream.close()
Пример #12
0
    def __init__( self, seqFile, protFile, convention)   :
        NTdict.__init__( self )

        #print '>', seqFile, protFile
        # parse the seqFile
        self.seq = {}
        resNum = 1
        self.resCount = 0
        for f in AwkLike( seqFile, commentString='#' ):
            #print '>>', f.dollar[0]
            if (not f.isEmpty() and not f.isComment( '#')):
                if ( f.dollar[1] in CYANA_NON_RESIDUES         # skip the bloody CYANA non-residue stuff
                   ):
                    pass

                elif (not NTdb.isValidResidueName( f.dollar[1], convention ) ):
                    nTerror( 'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"',
                             f.dollar[1], convention, seqFile, f.NR
                           )
                    self.error = 1
                else:
                    if (f.NF > 1):
                        resNum = f.int(2)
                        if resNum == None:
                            self.error = 1
                        #end if
                    #endif
                    self.seq[ resNum ] = f.dollar[1] # store original 'convention' name
                    resNum += 1
                    self.resCount += 1
                #end if
            #end if
        #end for
        self.seqFile = seqFile
        self.convention = convention

        # parse the prot file
        self.prot = {}
        self.protCount = 0
        self.error = 0
        for f in  AwkLike( protFile, commentString='#' ):
            if f.NF == 5:
                # Xeasy/Cyana atom index
                index = f.int( 1 )
                atomName  = f.dollar[4]
                resNum    = f.int( 5 )
                if resNum not in self.seq:
                    nTwarning( 'Xeasy: undefined residue number %d in "%s:%d" (%s)' % (
                             resNum, protFile, f.NR, f.dollar[0]))
                    self.error = 1
                else:
                    resName   = self.seq[resNum]
                    if not NTdb.isValidAtomName( resName, atomName, convention):
                        nTwarning('Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d' %(
                                   protFile, f.NR,  atomName, resName, resNum))
                        self.error = 1
                    else:
                        p = NTdict(index     = index,
                                     shift     = f.float( 2 ),
                                     error     = f.float( 3 ),
                                     atomName  = atomName,
                                     resNum    = resNum,
                                     resName   = resName,
                                     atom      = None
                                    )
                        self.prot[ index ] = p
                        self.protCount += 1
                    #end if
                #end if
            #end if
        #end for

        self.protFile = protFile
        nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s',
                      self.resCount, self.protCount, self.seqFile,self.protFile)
Пример #13
0
def main():
    'See above.'
    cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif',
                                        cvs_file_abs_name + '.gz')
    gunzip(cvs_file_abs_name_gz)
    reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE)
    valueBySs0AndResTypes = {}  # keys are SSi,   RTi, RTi-1
    valueBySs1AndResTypes = {}  # keys are SSi-1, RTi, RTi-1
    valueByResTypes = {}
    valueBySs0 = {}  # keys are SSi
    valueBySs1 = {}  # keys are SSi-1
    histd1CtupleBySsAndResTypes = {}
    value = []  # NB is an array without being keyed.

    histd1BySs0AndResTypes = {}  # keys are SSi,   RTi, RTi-1
    histd1BySs1AndResTypes = {}  # keys are SSi-1, RTi, RTi-1
    histd1ByResTypes = {}
    histd1BySs0 = {}
    histd1BySs1 = {}

    linesByEntry = {}
    lineCount = 0
    for row in reader:
        lineCount += 1
        if lineCount > lineCountMax:
            break
        entryId = row[0]
        if not linesByEntry.has_key(entryId):
            linesByEntry[entryId] = []
        linesByEntry[entryId].append(row)

    skippedResTypes = []
    entryIdList = linesByEntry.keys()
    entryIdList.sort()

    # Do some pre filtering.
    for entryId2 in entryIdList:
        lineList = linesByEntry[entryId2]
        for idx, line in enumerate(lineList):
            line.append(idx)
        lineListSorted = NTsort(lineList, BFACTOR_COLUMN, inplace=False)
        # Now throw away the worst 10 % of residues.
        n = len(lineListSorted)
        bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.))
        to_remove_count = n - bad_count
        #        nTmessage("Removing at least %d from %d residues" % (bad_count,n))
        badIdxList = [
            lineItem[IDX_COLUMN]
            for lineItem in lineListSorted[to_remove_count:n]
        ]
        iList = range(n)
        iList.reverse()
        for i in iList:
            lineItem = lineList[i]
            max_bfactor = float(lineItem[BFACTOR_COLUMN])
            if max_bfactor > DEFAULT_MAX_BFACTOR:
                #                nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem))
                del lineList[
                    i]  # TODO: check if indexing is still right or we shoot in the foot.
                continue
            if i in badIdxList:
                #                nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem))
                del lineList[i]
                continue
        removed_count = n - len(lineList)
        #        nTdebug("Reduced list by %d" % removed_count)
        if removed_count < bad_count:
            nTwarning("Failed to remove at least %d residues" % bad_count)

    for entryId2 in entryIdList:
        prevChainId = None
        prevResType = None
        prevResNum = None
        prevSsType = None
        for _r, row in enumerate(linesByEntry[entryId2]):
            #1zzk,A,GLN ,  17,E, 205.2, 193.6
            #1zzk,A,VAL ,  18,E, 193.6, 223.2
            #1zzk,A,THR ,  19,E, 223.2, 190.1
            (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor,
             _idx) = row
            resNum = int(resNum)
            ssType = to3StateDssp(ssType)[0]
            resType = resType.strip()
            db = NTdb.getResidueDefByName(resType)
            if not db:
                nTerror("resType not in db: %s" % resType)
                return
            resType = db.nameDict['IUPAC']
            d1 = d1.strip()
            d1 = floatParse(d1)
            if isNaN(d1):
                #                nTdebug("d1 %s is a NaN on row: %s" % (d1,row))
                continue
            if not inRange(d1):
                nTerror("d1 not in range for row: %s" % str(row))
                return

            if not (resType in common20AAList):
                #            nTmessage("Skipping uncommon residue: %s" % resType)
                if not (resType in skippedResTypes):
                    skippedResTypes.append(resType)
                continue

            if isSibling(chainId, resNum, prevChainId, prevResNum):
                appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType,
                                 prevResType)
                appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType,
                                 resType, prevResType)
                appendDeepByKeys(valueByResTypes, d1, resType, prevResType)
                appendDeepByKeys(valueBySs0, d1, ssType)
                appendDeepByKeys(valueBySs1, d1, prevSsType)
                value.append(d1)
            prevResType = resType
            prevResNum = resNum
            prevChainId = chainId
            prevSsType = ssType

    os.unlink(cvs_file_abs_name)
    nTmessage("Skipped skippedResTypes: %r" % skippedResTypes)
    nTmessage("Got count of values: %r" % len(value))
    # fill FOUR types of hist.
    # TODO: filter differently for pro/gly
    keyListSorted1 = valueBySs0AndResTypes.keys()
    keyListSorted1.sort()
    for isI in (True, False):
        if isI:
            valueBySs = valueBySs0
            valueBySsAndResTypes = valueBySs0AndResTypes
            histd1BySs = histd1BySs0
            histd1BySsAndResTypes = histd1BySs0AndResTypes
        else:
            valueBySs = valueBySs1
            valueBySsAndResTypes = valueBySs1AndResTypes
            histd1BySs = histd1BySs1
            histd1BySsAndResTypes = histd1BySs1AndResTypes
        for ssType in keyListSorted1:
            #            keyListSorted1b = deepcopy(keyListSorted1)
            #        for ssTypePrev in keyListSorted1b:
            d1List = valueBySs[ssType]
            if not d1List:
                nTerror("Expected d1List from valueBySs[%s]" % (ssType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType))
            setDeepByKeys(histd1BySs, hist1d, ssType)

            keyListSorted2 = valueBySsAndResTypes[ssType].keys()
            keyListSorted2.sort()
            for resType in keyListSorted2:
                #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
                keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys()
                keyListSorted3.sort()
                for prevResType in keyListSorted3:
                    #                nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                    d1List = valueBySsAndResTypes[ssType][resType][prevResType]
                    if not d1List:
                        nTerror(
                            "Expected d1List from valueBySsAndResTypes[%s][%s][%s]"
                            % (ssType, resType, prevResType))
                        continue
                    hist1d, _bins, _patches = hist(d1List,
                                                   bins=binCount,
                                                   range=xRange)
                    #                nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType))
                    setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType,
                                  resType, prevResType)
            # Now that they are all in we can redo this.
    # Delete the reference -not- the object.
    valueBySs = None
    valueBySsAndResTypes = None
    histd1BySs = None
    histd1BySsAndResTypes = None

    for ssType in keyListSorted1:
        for resType in keyListSorted2:
            #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
            keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys()
            keyListSorted3.sort()
            for resTypePrev in keyListSorted3:
                keyListSorted4 = keyListSorted3[:]  # take a copy
                for resTypeNext in keyListSorted4:
                    hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType,
                                          resType, resTypePrev)  # x-axis
                    # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext
                    hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType,
                                          resTypeNext, resType)
                    if hist1 == None:
                        nTdebug(
                            'skipping for hist1 is empty for [%s] [%s] [%s]' %
                            (ssType, resTypePrev, resType))
                        continue
                    if hist2 == None:
                        nTdebug(
                            'skipping for hist2 is empty for [%s] [%s] [%s]' %
                            (ssType, resType, resTypeNext))
                        continue
                    m1 = mat(hist1, dtype='float')
                    m2 = mat(hist2, dtype='float')
                    m2 = m2.transpose()  # pylint: disable=E1101
                    hist2d = multiply(m1, m2)

                    cTuple = getEnsembleAverageAndSigmaHis(hist2d)
                    (_c_av, c_sd, _hisMin, _hisMax) = cTuple  #@UnusedVariable
                    cTuple += tuple([
                        str([ssType, resType, resTypePrev, resTypeNext])
                    ])  # append the hash keys as a way of id.
                    #                    nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % (
                    #                        ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax))
                    if c_sd == None:
                        nTdebug(
                            'Failed to get c_sd when testing not all residues are present in smaller sets.'
                        )
                        continue
                    if c_sd == 0.:
                        nTdebug(
                            'Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.'
                        )
                        continue
                    setDeepByKeys(histd1CtupleBySsAndResTypes, cTuple, ssType,
                                  resType, resTypePrev, resTypeNext)
    # end for isI

    keyListSorted1 = valueByResTypes.keys()
    keyListSorted1.sort()
    for resType in keyListSorted1:
        keyListSorted2 = valueByResTypes[resType].keys()
        keyListSorted2.sort()
        for prevResType in keyListSorted2:
            d1List = valueByResTypes[resType][prevResType]
            if not d1List:
                nTerror("Expected d1List from valueByResTypes[%s][%s]" %
                        (resType, prevResType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            #            nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType))
            setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType)

    histd1, _bins, _patches = hist(value, bins=binCount, range=xRange)
    nTmessage("Count %6d in value" % sum(histd1))
    #    setDeepByKeys(histd1, hist1d, resType, prevResType)

    if os.path.exists(dbase_file_abs_name):
        os.unlink(dbase_file_abs_name)
    output = open(dbase_file_abs_name, 'wb')
    dbase = {}
    dbase[
        'histd1BySs0AndResTypes'] = histd1BySs0AndResTypes  # 92 kb uncompressed in the case of ~1000 lines only
    dbase['histd1BySs1AndResTypes'] = histd1BySs1AndResTypes
    dbase['histd1CtupleBySsAndResTypes'] = histd1CtupleBySsAndResTypes
    dbase['histd1ByResTypes'] = histd1ByResTypes  # 56 kb
    dbase['histd1BySs0'] = histd1BySs0  # 4 kb
    dbase['histd1BySs1'] = histd1BySs1
    dbase['histd1'] = histd1  #  4 kb

    cPickle.dump(dbase, output, 2)
    output.close()
Пример #14
0
from cing.core.database import NTdb
from cing.core.database import saveToSML

cing.verbosity = cing.verbosityDebug

if __name__ == '__main__':
    if 1:  # DEFAULT: 1 disable only when needed.
        nTwarning(
            "Don't execute this script %s by accident. It damages CING." %
            getCallerFileName())
        sys.exit(1)
    # end if

    convention = 'INTERNAL_1'

    for rdef in NTdb.residuesWithProperties('protein'):
        nTdebug(
            "Xplor N-terminal and C-terminal atom name translations changed for %s",
            rdef)
        for name1, namex in [('H1', 'HT1'), ('H2', 'HT2'), ('H3', 'HT3'),
                             ('OXT', 'OT2'), ('O', 'O,OT1')]:
            if name1 in rdef:
                rdef[name1].nameDict['XPLOR'] = namex
            #end if
        #end for
    #end for

    # save the new versions
    rootPath = os.path.realpath(
        os.path.join(cingPythonCingDir, 'Database', convention))
    saveToSML(NTdb, rootPath, convention)
Пример #15
0
from cing.core.classes import RDCRestraint, RDCRestraintList

# ---------------------------------------------------------------------------------------------
# functional imports: Order matters!
# ---------------------------------------------------------------------------------------------

# Try a Yasara import
# GV: We could change this by defining yasaradir in the CING setup
try:
    from yasara import yasaradir  # @UnresolvedImport # JFD: why not add the functionality from the plugin ?

    if os.path.exists(yasaradir):
        sys.path.append(os.path.join(yasaradir, "pym"))
        sys.path.append(os.path.join(yasaradir, "plg"))
    else:
        nTcodeerror('Yasara directory "%s" as defined in yasara.py module not found', yasaradir)
        exit(1)
except:
    yasaradir = None
# end try

from cing.core.molecule import *
from cing.core.importPlugin import importPlugin  # This imports all plugins
from cing.core.sml import obj2SML  # This also initializes the SMLhandler methods
from cing.core.sml import sML2obj  # This also initializes the SMLhandler methods


from cing.core.database import NTdb  # @Reimport

NTdb._restoreFromSML()  # This initializes the database
Пример #16
0
from cing.core.constants import *  #@UnusedWildImport
from cing.core.database import NTdb

for res in NTdb:
    res.nameDict[CYANA2] = res.nameDict[CYANA]
    for atm in res:
        if (atm.name == 'HN'):
            atm.nameDict[CYANA2] = 'H'
        else:
            atm.nameDict[CYANA2] = atm.nameDict[CYANA]

stream = open('dbTable-new.py', 'w')
NTdb.exportDef(stream=stream)
stream.close()
Пример #17
0
script to update xplor N-terminal and C-terminal name conventions

'''
from cing import cingPythonCingDir
from cing.Libs.NTutils import * #@UnusedWildImport
from cing.core.database import NTdb
from cing.core.database import saveToSML

cing.verbosity = cing.verbosityDebug

if __name__ == '__main__':
    if 1: # DEFAULT: 1 disable only when needed.
        nTwarning("Don't execute this script %s by accident. It damages CING." % getCallerFileName())
        sys.exit(1)
    # end if

    convention = 'INTERNAL_1'

    for rdef in NTdb.residuesWithProperties('protein'):
        nTdebug("Xplor N-terminal and C-terminal atom name translations changed for %s",rdef)
        for name1, namex in [('H1','HT1'), ('H2','HT2'), ('H3','HT3'), ('OXT','OT2'), ('O','O,OT1')]:
            if name1 in rdef:
                rdef[name1].nameDict['XPLOR'] = namex
            #end if
        #end for
    #end for

    # save the new versions
    rootPath = os.path.realpath(os.path.join(cingPythonCingDir, 'Database' , convention) )
    saveToSML( NTdb, rootPath, convention )
Пример #18
0
    def matchResidue2Cing(self, res):
        """
        Match res to CING database using previously defined convention;
        Account for 'ill-defined' residues by examining crucial atom names.
        Use CYANA (==DIANA) Naming for conversion to INTERNAL (i.e. These names will not likely change)

        Return NTdb resDef object None on Error

        res is a NTtree object with the following attributes set after this routine:
            db
            skip
            resName    and attributes for every atom it includes:
            HA2, CD1, ...
        """

        #        nTdebug("Now in _matchResidue2Cing: %s" % res)

        res.db = None
        res.skip = False

        # Residue names that are ambiguously defined by different PDB file formats
        if res.resName[0:3] == 'ARG':
            if 'HH1' in res:
                res.db = NTdb.getResidueDefByName('ARG', convention=CYANA)
            elif '1HH' in res:  # Second set for CYANA 1.x, AMBER
                res.db = NTdb.getResidueDefByName('ARG', convention=CYANA)
            else:
                # Default protonated; this also assures most common for X-ray without protons
                res.db = NTdb.getResidueDefByName('ARG+', convention=CYANA)
            #end if
        #end if
        elif res.resName[0:3] == 'ASP':
            if 'HD2' in res:
                #print 'ASPH'
                res.db = NTdb.getResidueDefByName('ASP', convention=CYANA)
            else:
                # Default deprot; this also assures most common for X-ray without protons
                #print 'ASP'
                res.db = NTdb.getResidueDefByName('ASP-', convention=CYANA)
            #end if
        elif res.resName[0:3] == 'GLU':
            if 'HE2' in res:
                #print 'GLUH'
                res.db = NTdb.getResidueDefByName('GLU', convention=CYANA)
            else:
                # Default deprot; this also assures most common for X-ray without protons
                #print 'GLU'
                res.db = NTdb.getResidueDefByName('GLU-', convention=CYANA)
            #end if
        elif res.resName[0:3] == 'HIS':
            if 'HD1' in res and 'HE2' in res:
                #print 'HISH'
                res.db = NTdb.getResidueDefByName('HIS+', convention=CYANA)
            elif not 'HD1' in res and 'HE2' in res:
                # print HISE
                res.db = NTdb.getResidueDefByName('HIST', convention=CYANA)
            else:
                # Default HD1
                #print 'HIS'
                res.db = NTdb.getResidueDefByName('HIS', convention=CYANA)
            #end if
        elif res.resName[0:3] == 'LYS':
            if ('HZ1' in res and not 'HZ3' in res):
                res.db = NTdb.getResidueDefByName('LYS', convention=CYANA)
            elif ('1HZ' in res
                  and not '3HZ' in res):  # Second set for CYANA 1.x
                res.db = NTdb.getResidueDefByName('LYS', convention=CYANA)
            else:
                # Default prot; this also assures most common for X-ray without protons
                res.db = NTdb.getResidueDefByName('LYS+', convention=CYANA)
            #end if
        elif res.resName in CYANA_NON_RESIDUES:
            res.skip = True
        elif res.resName == 'HOH' and self.skipWaters:
            res.skip = True
        else:
            res.db = NTdb.getResidueDefByName(res.resName,
                                              convention=self.convention)
        #end if

        # Only continue the search if not found and non-standard residues are allowed.
        if res.db:
            return res.db

        if not self.allowNonStandardResidue:
            res.skip = True
            return res.db

        # Try to match the residue using INTERNAL convention.
        res.db = NTdb.getResidueDefByName(res.resName)
        if res.db:
            return res.db

#        insert new residue.
        res.db = NTdb.appendResidueDef(name=res.resName,
                                       shortName='_',
                                       comment='From parsing PDB file')
        if not res.db:
            nTcodeerror(
                "Adding a non-standard residue should have been possible.")
            return None
        res.db.nameDict[self.convention] = res.resName

        # Just a check, disable for speed.
        _x = NTdb.getResidueDefByName(res.resName)
        if not _x:
            nTcodeerror(
                "Added residue but failed to find it again in pdbParser#_matchResidue2Cing"
            )

        return res.db
Пример #19
0
def plotHistogramOverall():
    graphicsFormat = "png"
    alpha = 0.8  # was 0.8; looks awful with alpha = 1
    n = 20
    #    d = 3 # number of ss types.
    extent = (0, n) + (0, n)
    cmapList = [green_inv, blue_inv, yellow_inv]
    colorList = ['green', 'blue', 'yellow']
    i = 1  # decides on color picked.

    # If set it will do a single ssType otherwise the overall.
    for doOverall in [False, True]:
        #    for doOverall in [ True ]:
        if doOverall:
            ssTypeList = [None]
        else:
            ssTypeList = [' ', 'S', 'H']

        for ssType in ssTypeList:
            m = zeros((n * n), dtype=int).reshape(n, n)
            #    mBySs = zeros((n,n,d), dtype=int).reshape(n,n,d)
            tickList = [
                NTdb.getResidueDefByName(resType).shortName
                for resType in common20AAList
            ]
            #        tickListRev = tickList[:]
            #        tickListRev.reverse()
            for r, resTypePrev in enumerate(common20AAList):
                for c, resType in enumerate(common20AAList):
                    if doOverall:
                        hist1 = getDeepByKeys(hPlot.histd1ByResTypes, resType,
                                              resTypePrev)
                    else:
                        hist1 = getDeepByKeys(hPlot.histd1BySs0AndResTypes,
                                              ssType, resType, resTypePrev)
                    if hist1 == None:
                        nTdebug('skipping for hist1 is empty for [%s] [%s]' %
                                (resType, resTypePrev))
                        continue
                    m[r, c] = sum(hist1)

            clf()

            #            axes([.1, .1, .8, .8 ] )
            xlabel('resType')
            ylabel('resTypePrev')
            xlim((0, n))
            ylim((0, n))
            offset = 0.5
            xticks(arange(offset, n), tickList)
            yticks(arange(offset, n), tickList)
            #            print 'just before call to set_ticks_position'
            #        axis.xaxis.set_ticks_position('top')
            #        axis.xaxis.set_label_position('top')
            #    axis.yaxis.set_ticks_position('both')
            #    axis.yaxis.set_label_position('left')
            grid(True)
            strTitle = "ssType: [%s]" % ssType
            title(strTitle)
            plot([0, n], [0, n], 'b-', linewidth=1)
            minCount = 300.
            maxCount = 1000.
            if False:
                minCount = 0.
                maxCount = 1.
            if ssType:
                minCount /= 3.
                maxCount /= 3.
            maxHist = amax(m)
            minHist = amin(m)
            sumHist = sum(m)
            nTmessage('ssType: %s' % ssType)
            nTmessage('maxHist: %s' % maxHist)  # 9165 of total of ~ 1 M.
            nTmessage('minHist: %s' % minHist)  # 210
            nTmessage('sumHist: %s' % sumHist)  # 210
            #            nTmessage('tickList: %s' % tickList) # 210
            #    his *= 100./maxHist
            his = masked_where(m <= minCount, m, copy=1)

            palette = cmapList[i]
            palette.set_under(color='red', alpha=1.0)  # alpha is 0.0
            palette.set_over(
                color=colorList[i], alpha=1.0
            )  # alpha is 1.0 Important to make it a hard alpha; last plotted will rule.
            palette.set_bad(color='red', alpha=1.0)

            norm = Normalize(vmin=minCount, vmax=maxCount,
                             clip=True)  # clip is False
            imshow(
                his,
                interpolation='nearest',
                #            interpolation='bicubic',
                origin='lower',
                extent=extent,
                alpha=alpha,
                cmap=palette,
                norm=norm)
            #            mr = m[::-1] # reverses the rows, nice!
            #            nTmessage('mr: %s' % mr)

            fn = "plotHistogram_%s_d1d2.%s" % (ssType, graphicsFormat)
            savefig(fn)

            clf()
            a = m.reshape(n * n)
            hist(a, 20)
            xlabel('pair count')
            ylabel('number of occurrences')
            title(strTitle)
            fn = "plotHistOfHist_%s_d1d2.%s" % (ssType, graphicsFormat)
            savefig(fn)

        # end loop over ssType
    # end over ssType overall
    return m
Пример #20
0
def main():
    'See above.'
    cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz')
    gunzip(cvs_file_abs_name_gz)
    reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE)
    valueBySs0AndResTypes = {} # keys are SSi,   RTi, RTi-1
    valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1
    valueByResTypes = {}
    valueBySs0 = {} # keys are SSi
    valueBySs1 = {} # keys are SSi-1
    histd1CtupleBySsAndResTypes = {}
    value = [] # NB is an array without being keyed.

    histd1BySs0AndResTypes = {} # keys are SSi,   RTi, RTi-1
    histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1
    histd1ByResTypes = {}
    histd1BySs0 = {}
    histd1BySs1 = {}


    linesByEntry = {}
    lineCount = 0
    for row in reader:
        lineCount += 1
        if lineCount > lineCountMax:
            break
        entryId = row[0]
        if not linesByEntry.has_key(entryId):
            linesByEntry[ entryId ] = []
        linesByEntry[ entryId ].append( row )

    skippedResTypes = []
    entryIdList = linesByEntry.keys()
    entryIdList.sort()

    # Do some pre filtering.
    for entryId2 in entryIdList:
        lineList = linesByEntry[ entryId2 ]
        for idx,line in enumerate(lineList):
            line.append(idx)
        lineListSorted = NTsort(lineList,BFACTOR_COLUMN,inplace=False)
        # Now throw away the worst 10 % of residues.
        n = len(lineListSorted)
        bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.))
        to_remove_count = n-bad_count
#        nTmessage("Removing at least %d from %d residues" % (bad_count,n))
        badIdxList = [lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n]]
        iList = range(n)
        iList.reverse()
        for i in iList:
            lineItem = lineList[i]
            max_bfactor = float(lineItem[BFACTOR_COLUMN])
            if max_bfactor > DEFAULT_MAX_BFACTOR:
#                nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem))
                del lineList[i] # TODO: check if indexing is still right or we shoot in the foot.
                continue
            if i in badIdxList:
#                nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem))
                del lineList[i]
                continue
        removed_count = n - len(lineList)
#        nTdebug("Reduced list by %d" % removed_count)
        if removed_count < bad_count:
            nTwarning("Failed to remove at least %d residues" % bad_count)

    for entryId2 in entryIdList:
        prevChainId = None
        prevResType = None
        prevResNum = None
        prevSsType = None
        for _r, row in enumerate(linesByEntry[ entryId2 ]):
    #1zzk,A,GLN ,  17,E, 205.2, 193.6
    #1zzk,A,VAL ,  18,E, 193.6, 223.2
    #1zzk,A,THR ,  19,E, 223.2, 190.1
            (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row
            resNum = int(resNum)
            ssType = to3StateDssp(ssType)[0]
            resType = resType.strip()
            db = NTdb.getResidueDefByName( resType )
            if not db:
                nTerror("resType not in db: %s" % resType)
                return
            resType = db.nameDict['IUPAC']
            d1 = d1.strip()
            d1 = floatParse(d1)
            if isNaN(d1):
#                nTdebug("d1 %s is a NaN on row: %s" % (d1,row))
                continue
            if not inRange(d1):
                nTerror("d1 not in range for row: %s" % str(row))
                return

            if not (resType in common20AAList):
    #            nTmessage("Skipping uncommon residue: %s" % resType)
                if not ( resType in skippedResTypes):
                    skippedResTypes.append( resType )
                continue

            if isSibling(chainId, resNum, prevChainId, prevResNum):
                appendDeepByKeys(valueBySs0AndResTypes, d1, ssType,     resType, prevResType)
                appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType)
                appendDeepByKeys(valueByResTypes, d1, resType, prevResType)
                appendDeepByKeys(valueBySs0, d1, ssType)
                appendDeepByKeys(valueBySs1, d1, prevSsType)
                value.append( d1 )
            prevResType = resType
            prevResNum = resNum
            prevChainId = chainId
            prevSsType = ssType

    os.unlink(cvs_file_abs_name)
    nTmessage("Skipped skippedResTypes: %r" % skippedResTypes )
    nTmessage("Got count of values: %r" % len(value) )
    # fill FOUR types of hist.
    # TODO: filter differently for pro/gly
    keyListSorted1 = valueBySs0AndResTypes.keys()
    keyListSorted1.sort()
    for isI in (True, False):
        if isI:
            valueBySs = valueBySs0
            valueBySsAndResTypes = valueBySs0AndResTypes
            histd1BySs = histd1BySs0
            histd1BySsAndResTypes = histd1BySs0AndResTypes
        else:
            valueBySs = valueBySs1
            valueBySsAndResTypes = valueBySs1AndResTypes
            histd1BySs = histd1BySs1
            histd1BySsAndResTypes = histd1BySs1AndResTypes
        for ssType in keyListSorted1:
#            keyListSorted1b = deepcopy(keyListSorted1)
    #        for ssTypePrev in keyListSorted1b:
            d1List = valueBySs[ssType]
            if not d1List:
                nTerror("Expected d1List from valueBySs[%s]" % (ssType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
            nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType))
            setDeepByKeys(histd1BySs, hist1d, ssType)

            keyListSorted2 = valueBySsAndResTypes[ssType].keys()
            keyListSorted2.sort()
            for resType in keyListSorted2:
    #            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
                keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys()
                keyListSorted3.sort()
                for prevResType in keyListSorted3:
    #                nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                    d1List = valueBySsAndResTypes[ssType][resType][prevResType]
                    if not d1List:
                        nTerror("Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType))
                        continue
                    hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
    #                nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType))
                    setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType)
            # Now that they are all in we can redo this.
    # Delete the reference -not- the object.
    valueBySs = None
    valueBySsAndResTypes = None
    histd1BySs = None
    histd1BySsAndResTypes = None

    for ssType in keyListSorted1:
        for resType in keyListSorted2:
#            nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity.
            keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys()
            keyListSorted3.sort()
            for resTypePrev in keyListSorted3:
                keyListSorted4 = keyListSorted3[:] # take a copy
                for resTypeNext in keyListSorted4:
                    hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis
                    # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext
                    hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) 
                    if hist1 == None:
                        nTdebug('skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType))
                        continue
                    if hist2 == None:
                        nTdebug('skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext))
                        continue
                    m1 = mat(hist1,dtype='float')
                    m2 = mat(hist2,dtype='float')
                    m2 = m2.transpose() # pylint: disable=E1101
                    hist2d = multiply(m1,m2)

                    cTuple = getEnsembleAverageAndSigmaHis( hist2d )
                    (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable
                    cTuple += tuple([str([ssType, resType, resTypePrev, resTypeNext])]) # append the hash keys as a way of id.
#                    nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % (
#                        ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax))
                    if c_sd == None:
                        nTdebug('Failed to get c_sd when testing not all residues are present in smaller sets.')
                        continue
                    if c_sd == 0.:
                        nTdebug('Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.')
                        continue
                    setDeepByKeys( histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext)
    # end for isI

    keyListSorted1 = valueByResTypes.keys()
    keyListSorted1.sort()
    for resType in keyListSorted1:
        keyListSorted2 = valueByResTypes[resType].keys()
        keyListSorted2.sort()
        for prevResType in keyListSorted2:
            d1List = valueByResTypes[resType][prevResType]
            if not d1List:
                nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType))
                continue
            hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange)
#            nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType))
            setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType)

    histd1, _bins, _patches = hist(value, bins=binCount, range=xRange)
    nTmessage("Count %6d in value" % sum(histd1))
#    setDeepByKeys(histd1, hist1d, resType, prevResType)

    if os.path.exists(dbase_file_abs_name):
        os.unlink(dbase_file_abs_name)
    output = open(dbase_file_abs_name, 'wb')
    dbase = {}
    dbase[ 'histd1BySs0AndResTypes' ] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only
    dbase[ 'histd1BySs1AndResTypes' ] = histd1BySs1AndResTypes
    dbase[ 'histd1CtupleBySsAndResTypes' ] = histd1CtupleBySsAndResTypes
    dbase[ 'histd1ByResTypes' ] = histd1ByResTypes # 56 kb
    dbase[ 'histd1BySs0' ] = histd1BySs0 # 4 kb
    dbase[ 'histd1BySs1' ] = histd1BySs1
    dbase[ 'histd1' ] = histd1 #  4 kb

    cPickle.dump(dbase, output, 2)
    output.close()
Пример #21
0
    def __init__(self, seqFile, protFile, convention):
        NTdict.__init__(self)

        #print '>', seqFile, protFile
        # parse the seqFile
        self.seq = {}
        resNum = 1
        self.resCount = 0
        for f in AwkLike(seqFile, commentString='#'):
            #print '>>', f.dollar[0]
            if (not f.isEmpty() and not f.isComment('#')):
                if (f.dollar[1] in
                        CYANA_NON_RESIDUES  # skip the bloody CYANA non-residue stuff
                    ):
                    pass

                elif (not NTdb.isValidResidueName(f.dollar[1], convention)):
                    nTerror(
                        'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"',
                        f.dollar[1], convention, seqFile, f.NR)
                    self.error = 1
                else:
                    if (f.NF > 1):
                        resNum = f.int(2)
                        if resNum == None:
                            self.error = 1
                        #end if
                    #endif
                    self.seq[resNum] = f.dollar[
                        1]  # store original 'convention' name
                    resNum += 1
                    self.resCount += 1
                #end if
            #end if
        #end for
        self.seqFile = seqFile
        self.convention = convention

        # parse the prot file
        self.prot = {}
        self.protCount = 0
        self.error = 0
        for f in AwkLike(protFile, commentString='#'):
            if f.NF == 5:
                # Xeasy/Cyana atom index
                index = f.int(1)
                atomName = f.dollar[4]
                resNum = f.int(5)
                if resNum not in self.seq:
                    nTwarning(
                        'Xeasy: undefined residue number %d in "%s:%d" (%s)' %
                        (resNum, protFile, f.NR, f.dollar[0]))
                    self.error = 1
                else:
                    resName = self.seq[resNum]
                    if not NTdb.isValidAtomName(resName, atomName, convention):
                        nTwarning(
                            'Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d'
                            % (protFile, f.NR, atomName, resName, resNum))
                        self.error = 1
                    else:
                        p = NTdict(index=index,
                                   shift=f.float(2),
                                   error=f.float(3),
                                   atomName=atomName,
                                   resNum=resNum,
                                   resName=resName,
                                   atom=None)
                        self.prot[index] = p
                        self.protCount += 1
                    #end if
                #end if
            #end if
        #end for

        self.protFile = protFile
        nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s',
                  self.resCount, self.protCount, self.seqFile, self.protFile)
Пример #22
0
    # parse this xplor pdbfile
    pdbfile = PDBFile( fName )

    # print a MODEL record
    modelCount += 1
    mdl         = MODEL()
    mdl.serial  = modelCount
    fprintf( pdbFile, '%s\n', mdl )

    atomCount = 0
    lastRecord = None
    for record in pdbfile:
        if record._name.strip() in ["ATOM","HETATM"]:

            # see if we can find a definition for this residue, atom name in the database
            atm = NTdb.getAtomDefByName( record.resName, record.name, XPLOR )

            # we found a match
            if (atm != None):
                # check if there is an convention equivalent; skip otherwise
                if (atm.translate(convention) != None and atm.residueDef.translate(convention) != None):
                    atomCount     += 1
                    record.serial  = atomCount
                    record.resName = atm.residueDef.translate( convention )
                    record.name    = atm.translate( convention )
                    if not 'chainID' in record:
                        record.chainID = 'A'
                    #end if
                    fprintf( pdbFile, "%s\n", record )
                    lastRecord = record
                else: