def copy_from_convention(from_convention, new_convention, protein_only=True): """Copy nomenclature convention from from_convention to new_convention. Only copy standard protein residues if protein_only (defaults to True). """ residue_definitions = NTdb.residuesWithProperties("protein") if not protein_only: residue_definitions = NTdb.allResidueDefs() for res_def in residue_definitions: nTdebug("Copying %s nomenclature convention to %s for %s", from_convention, new_convention, res_def) res_def.nameDict[new_convention] = res_def.nameDict[from_convention] for atom_def in res_def: atom_def.nameDict[new_convention] = atom_def.nameDict[from_convention] atom_def.postProcess() res_def.postProcess()
def testSelectByItems(self): # E.g. if adl is the AtomDef NTlist byItems = ('type', 'C_VIN') vadl = NTdb.allAtomDefs().selectByItems(*byItems) # vadl = adl. nTdebug("%s in db: %s" % (byItems[1], str(vadl))) self.assertTrue(len(vadl) >= 11) # allow growth but not shrinkage.
def testSelectByItems(self): # E.g. if adl is the AtomDef NTlist byItems = ( 'type', 'C_VIN' ) vadl = NTdb.allAtomDefs().selectByItems( *byItems ) # vadl = adl. nTdebug("%s in db: %s" % (byItems[1], str(vadl))) self.assertTrue( len(vadl) >= 11 ) # allow growth but not shrinkage.
def correct_xplor_stap(protein_only=True): """Correct atom definitions copied from XPLOR for STAP. Only correct standard protein residues if protein_only (defaults to True). """ remove_non_stap_residues(NTdb) correct_his_stap(NTdb) correct_hg_stap(NTdb['CYS']) correct_hg_stap(NTdb['SER']) correct_ile_d_stap(NTdb['ILE']) residue_definitions = NTdb.residuesWithProperties('protein') if not protein_only: residue_definitions = NTdb.allResidueDefs() for res_def in residue_definitions: correct_termini_stap(res_def) remove_pseudo_atoms(res_def)
def copy_from_convention(from_convention, new_convention, protein_only=True): """Copy nomenclature convention from from_convention to new_convention. Only copy standard protein residues if protein_only (defaults to True). """ residue_definitions = NTdb.residuesWithProperties('protein') if not protein_only: residue_definitions = NTdb.allResidueDefs() for res_def in residue_definitions: nTdebug("Copying %s nomenclature convention to %s for %s", from_convention, new_convention, res_def) res_def.nameDict[new_convention] = res_def.nameDict[from_convention] for atom_def in res_def: atom_def.nameDict[new_convention] = atom_def.nameDict[ from_convention] atom_def.postProcess() res_def.postProcess()
def matchResidue2Cing(self, res): """ Match res to CING database using previously defined convention; Account for 'ill-defined' residues by examining crucial atom names. Use CYANA (==DIANA) Naming for conversion to INTERNAL (i.e. These names will not likely change) Return NTdb resDef object None on Error res is a NTtree object with the following attributes set after this routine: db skip resName and attributes for every atom it includes: HA2, CD1, ... """ # nTdebug("Now in _matchResidue2Cing: %s" % res) res.db = None res.skip = False # Residue names that are ambiguously defined by different PDB file formats if res.resName[0:3] == 'ARG': if 'HH1' in res: res.db = NTdb.getResidueDefByName('ARG', convention = CYANA) elif '1HH' in res: # Second set for CYANA 1.x, AMBER res.db = NTdb.getResidueDefByName('ARG', convention = CYANA) else: # Default protonated; this also assures most common for X-ray without protons res.db = NTdb.getResidueDefByName('ARG+', convention = CYANA) #end if #end if elif res.resName[0:3] == 'ASP': if 'HD2' in res: #print 'ASPH' res.db = NTdb.getResidueDefByName('ASP', convention = CYANA) else: # Default deprot; this also assures most common for X-ray without protons #print 'ASP' res.db = NTdb.getResidueDefByName('ASP-', convention = CYANA) #end if elif res.resName[0:3] == 'GLU': if 'HE2' in res: #print 'GLUH' res.db = NTdb.getResidueDefByName('GLU', convention = CYANA) else: # Default deprot; this also assures most common for X-ray without protons #print 'GLU' res.db = NTdb.getResidueDefByName('GLU-', convention = CYANA) #end if elif res.resName[0:3] == 'HIS': if 'HD1' in res and 'HE2' in res: #print 'HISH' res.db = NTdb.getResidueDefByName('HIS+', convention = CYANA) elif not 'HD1' in res and 'HE2' in res: # print HISE res.db = NTdb.getResidueDefByName('HIST', convention = CYANA) else: # Default HD1 #print 'HIS' res.db = NTdb.getResidueDefByName('HIS', convention = CYANA) #end if elif res.resName[0:3] == 'LYS': if ('HZ1' in res and not 'HZ3' in res): res.db = NTdb.getResidueDefByName('LYS', convention = CYANA) elif ('1HZ' in res and not '3HZ' in res): # Second set for CYANA 1.x res.db = NTdb.getResidueDefByName('LYS', convention = CYANA) else: # Default prot; this also assures most common for X-ray without protons res.db = NTdb.getResidueDefByName('LYS+', convention = CYANA) #end if elif res.resName in CYANA_NON_RESIDUES: res.skip = True elif res.resName == 'HOH' and self.skipWaters: res.skip = True else: res.db = NTdb.getResidueDefByName(res.resName, convention = self.convention) #end if # Only continue the search if not found and non-standard residues are allowed. if res.db: return res.db if not self.allowNonStandardResidue: res.skip = True return res.db # Try to match the residue using INTERNAL convention. res.db = NTdb.getResidueDefByName(res.resName) if res.db: return res.db # insert new residue. res.db = NTdb.appendResidueDef(name = res.resName, shortName = '_', comment='From parsing PDB file') if not res.db: nTcodeerror("Adding a non-standard residue should have been possible.") return None res.db.nameDict[self.convention] = res.resName # Just a check, disable for speed. _x = NTdb.getResidueDefByName(res.resName) if not _x: nTcodeerror("Added residue but failed to find it again in pdbParser#_matchResidue2Cing") return res.db
from cing.core.classes import DihedralRestraint, DihedralRestraintList from cing.core.classes import RDCRestraint, RDCRestraintList #--------------------------------------------------------------------------------------------- # functional imports: Order matters! #--------------------------------------------------------------------------------------------- # Try a Yasara import # GV: We could change this by defining yasaradir in the CING setup try: from yasara import yasaradir #@UnresolvedImport # JFD: why not add the functionality from the plugin ? if os.path.exists(yasaradir): sys.path.append(os.path.join(yasaradir, 'pym')) sys.path.append(os.path.join(yasaradir, 'plg')) else: nTcodeerror( 'Yasara directory "%s" as defined in yasara.py module not found', yasaradir) exit(1) except: yasaradir = None #end try from cing.core.molecule import * from cing.core.importPlugin import importPlugin # This imports all plugins from cing.core.sml import obj2SML # This also initializes the SMLhandler methods from cing.core.sml import sML2obj # This also initializes the SMLhandler methods from cing.core.database import NTdb #@Reimport NTdb._restoreFromSML() # This initializes the database
# parse this xplor pdbfile pdbfile = PDBFile( fName ) # print a MODEL record modelCount += 1 mdl = MODEL() mdl.serial = modelCount fprintf( pdbFile, '%s\n', mdl ) atomCount = 0 lastRecord = None for record in pdbfile: if record._name.strip() in ["ATOM","HETATM"]: # see if we can find a definition for this residue, atom name in the database atm = NTdb.getAtomDefByName( record.resName, record.name, XPLOR ) # we found a match if (atm != None): # check if there is an convention equivalent; skip otherwise if (atm.translate(convention) != None and atm.residueDef.translate(convention) != None): atomCount += 1 record.serial = atomCount record.resName = atm.residueDef.translate( convention ) record.name = atm.translate( convention ) if not 'chainID' in record: record.chainID = 'A' #end if fprintf( pdbFile, "%s\n", record ) lastRecord = record else:
def plotHistogramOverall(): graphicsFormat = "png" alpha = 0.8 # was 0.8; looks awful with alpha = 1 n = 20 # d = 3 # number of ss types. extent = (0, n) + (0, n) cmapList = [ green_inv, blue_inv, yellow_inv ] colorList = [ 'green', 'blue', 'yellow'] i = 1 # decides on color picked. # If set it will do a single ssType otherwise the overall. for doOverall in [ False, True ]: # for doOverall in [ True ]: if doOverall: ssTypeList = [ None ] else: ssTypeList = [' ', 'S', 'H'] for ssType in ssTypeList: m = zeros((n * n), dtype=int).reshape(n, n) # mBySs = zeros((n,n,d), dtype=int).reshape(n,n,d) tickList = [ NTdb.getResidueDefByName(resType).shortName for resType in common20AAList] # tickListRev = tickList[:] # tickListRev.reverse() for r, resTypePrev in enumerate(common20AAList): for c, resType in enumerate(common20AAList): if doOverall: hist1 = getDeepByKeys(hPlot.histd1ByResTypes, resType, resTypePrev) else: hist1 = getDeepByKeys(hPlot.histd1BySs0AndResTypes, ssType, resType, resTypePrev) if hist1 == None: nTdebug('skipping for hist1 is empty for [%s] [%s]' % (resType, resTypePrev)) continue m[r, c] = sum(hist1) clf() # axes([.1, .1, .8, .8 ] ) xlabel('resType') ylabel('resTypePrev') xlim((0, n)) ylim((0, n)) offset = 0.5 xticks(arange(offset, n), tickList) yticks(arange(offset, n), tickList) # print 'just before call to set_ticks_position' # axis.xaxis.set_ticks_position('top') # axis.xaxis.set_label_position('top') # axis.yaxis.set_ticks_position('both') # axis.yaxis.set_label_position('left') grid(True) strTitle = "ssType: [%s]" % ssType title(strTitle) plot([0, n], [0, n], 'b-', linewidth=1) minCount = 300. maxCount = 1000. if False: minCount = 0. maxCount = 1. if ssType: minCount /= 3. maxCount /= 3. maxHist = amax(m) minHist = amin(m) sumHist = sum(m) nTmessage('ssType: %s' % ssType) nTmessage('maxHist: %s' % maxHist) # 9165 of total of ~ 1 M. nTmessage('minHist: %s' % minHist) # 210 nTmessage('sumHist: %s' % sumHist) # 210 # nTmessage('tickList: %s' % tickList) # 210 # his *= 100./maxHist his = masked_where(m <= minCount, m, copy=1) palette = cmapList[i] palette.set_under(color='red', alpha=1.0) # alpha is 0.0 palette.set_over(color=colorList[i], alpha=1.0) # alpha is 1.0 Important to make it a hard alpha; last plotted will rule. palette.set_bad(color='red', alpha=1.0) norm = Normalize(vmin=minCount, vmax=maxCount, clip=True) # clip is False imshow(his, interpolation='nearest', # interpolation='bicubic', origin='lower', extent=extent, alpha=alpha, cmap=palette, norm=norm) # mr = m[::-1] # reverses the rows, nice! # nTmessage('mr: %s' % mr) fn = "plotHistogram_%s_d1d2.%s" % (ssType, graphicsFormat) savefig(fn) clf() a = m.reshape(n * n) hist(a, 20) xlabel('pair count') ylabel('number of occurrences') title(strTitle) fn = "plotHistOfHist_%s_d1d2.%s" % (ssType, graphicsFormat) savefig(fn) # end loop over ssType # end over ssType overall return m
from cing.core.constants import * #@UnusedWildImport from cing.core.database import NTdb for res in NTdb: res.nameDict[CYANA2] = res.nameDict[CYANA] for atm in res: if (atm.name == 'HN'): atm.nameDict[CYANA2] = 'H' else: atm.nameDict[CYANA2] = atm.nameDict[CYANA] stream = open('dbTable-new.py', 'w') NTdb.exportDef(stream=stream) stream.close()
def __init__( self, seqFile, protFile, convention) : NTdict.__init__( self ) #print '>', seqFile, protFile # parse the seqFile self.seq = {} resNum = 1 self.resCount = 0 for f in AwkLike( seqFile, commentString='#' ): #print '>>', f.dollar[0] if (not f.isEmpty() and not f.isComment( '#')): if ( f.dollar[1] in CYANA_NON_RESIDUES # skip the bloody CYANA non-residue stuff ): pass elif (not NTdb.isValidResidueName( f.dollar[1], convention ) ): nTerror( 'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"', f.dollar[1], convention, seqFile, f.NR ) self.error = 1 else: if (f.NF > 1): resNum = f.int(2) if resNum == None: self.error = 1 #end if #endif self.seq[ resNum ] = f.dollar[1] # store original 'convention' name resNum += 1 self.resCount += 1 #end if #end if #end for self.seqFile = seqFile self.convention = convention # parse the prot file self.prot = {} self.protCount = 0 self.error = 0 for f in AwkLike( protFile, commentString='#' ): if f.NF == 5: # Xeasy/Cyana atom index index = f.int( 1 ) atomName = f.dollar[4] resNum = f.int( 5 ) if resNum not in self.seq: nTwarning( 'Xeasy: undefined residue number %d in "%s:%d" (%s)' % ( resNum, protFile, f.NR, f.dollar[0])) self.error = 1 else: resName = self.seq[resNum] if not NTdb.isValidAtomName( resName, atomName, convention): nTwarning('Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d' %( protFile, f.NR, atomName, resName, resNum)) self.error = 1 else: p = NTdict(index = index, shift = f.float( 2 ), error = f.float( 3 ), atomName = atomName, resNum = resNum, resName = resName, atom = None ) self.prot[ index ] = p self.protCount += 1 #end if #end if #end if #end for self.protFile = protFile nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s', self.resCount, self.protCount, self.seqFile,self.protFile)
def main(): 'See above.' cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz') gunzip(cvs_file_abs_name_gz) reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE) valueBySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 valueByResTypes = {} valueBySs0 = {} # keys are SSi valueBySs1 = {} # keys are SSi-1 histd1CtupleBySsAndResTypes = {} value = [] # NB is an array without being keyed. histd1BySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 histd1ByResTypes = {} histd1BySs0 = {} histd1BySs1 = {} linesByEntry = {} lineCount = 0 for row in reader: lineCount += 1 if lineCount > lineCountMax: break entryId = row[0] if not linesByEntry.has_key(entryId): linesByEntry[entryId] = [] linesByEntry[entryId].append(row) skippedResTypes = [] entryIdList = linesByEntry.keys() entryIdList.sort() # Do some pre filtering. for entryId2 in entryIdList: lineList = linesByEntry[entryId2] for idx, line in enumerate(lineList): line.append(idx) lineListSorted = NTsort(lineList, BFACTOR_COLUMN, inplace=False) # Now throw away the worst 10 % of residues. n = len(lineListSorted) bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.)) to_remove_count = n - bad_count # nTmessage("Removing at least %d from %d residues" % (bad_count,n)) badIdxList = [ lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n] ] iList = range(n) iList.reverse() for i in iList: lineItem = lineList[i] max_bfactor = float(lineItem[BFACTOR_COLUMN]) if max_bfactor > DEFAULT_MAX_BFACTOR: # nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem)) del lineList[ i] # TODO: check if indexing is still right or we shoot in the foot. continue if i in badIdxList: # nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem)) del lineList[i] continue removed_count = n - len(lineList) # nTdebug("Reduced list by %d" % removed_count) if removed_count < bad_count: nTwarning("Failed to remove at least %d residues" % bad_count) for entryId2 in entryIdList: prevChainId = None prevResType = None prevResNum = None prevSsType = None for _r, row in enumerate(linesByEntry[entryId2]): #1zzk,A,GLN , 17,E, 205.2, 193.6 #1zzk,A,VAL , 18,E, 193.6, 223.2 #1zzk,A,THR , 19,E, 223.2, 190.1 (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row resNum = int(resNum) ssType = to3StateDssp(ssType)[0] resType = resType.strip() db = NTdb.getResidueDefByName(resType) if not db: nTerror("resType not in db: %s" % resType) return resType = db.nameDict['IUPAC'] d1 = d1.strip() d1 = floatParse(d1) if isNaN(d1): # nTdebug("d1 %s is a NaN on row: %s" % (d1,row)) continue if not inRange(d1): nTerror("d1 not in range for row: %s" % str(row)) return if not (resType in common20AAList): # nTmessage("Skipping uncommon residue: %s" % resType) if not (resType in skippedResTypes): skippedResTypes.append(resType) continue if isSibling(chainId, resNum, prevChainId, prevResNum): appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType, prevResType) appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType) appendDeepByKeys(valueByResTypes, d1, resType, prevResType) appendDeepByKeys(valueBySs0, d1, ssType) appendDeepByKeys(valueBySs1, d1, prevSsType) value.append(d1) prevResType = resType prevResNum = resNum prevChainId = chainId prevSsType = ssType os.unlink(cvs_file_abs_name) nTmessage("Skipped skippedResTypes: %r" % skippedResTypes) nTmessage("Got count of values: %r" % len(value)) # fill FOUR types of hist. # TODO: filter differently for pro/gly keyListSorted1 = valueBySs0AndResTypes.keys() keyListSorted1.sort() for isI in (True, False): if isI: valueBySs = valueBySs0 valueBySsAndResTypes = valueBySs0AndResTypes histd1BySs = histd1BySs0 histd1BySsAndResTypes = histd1BySs0AndResTypes else: valueBySs = valueBySs1 valueBySsAndResTypes = valueBySs1AndResTypes histd1BySs = histd1BySs1 histd1BySsAndResTypes = histd1BySs1AndResTypes for ssType in keyListSorted1: # keyListSorted1b = deepcopy(keyListSorted1) # for ssTypePrev in keyListSorted1b: d1List = valueBySs[ssType] if not d1List: nTerror("Expected d1List from valueBySs[%s]" % (ssType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType)) setDeepByKeys(histd1BySs, hist1d, ssType) keyListSorted2 = valueBySsAndResTypes[ssType].keys() keyListSorted2.sort() for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys() keyListSorted3.sort() for prevResType in keyListSorted3: # nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) d1List = valueBySsAndResTypes[ssType][resType][prevResType] if not d1List: nTerror( "Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType)) setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType) # Now that they are all in we can redo this. # Delete the reference -not- the object. valueBySs = None valueBySsAndResTypes = None histd1BySs = None histd1BySsAndResTypes = None for ssType in keyListSorted1: for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys() keyListSorted3.sort() for resTypePrev in keyListSorted3: keyListSorted4 = keyListSorted3[:] # take a copy for resTypeNext in keyListSorted4: hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) if hist1 == None: nTdebug( 'skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType)) continue if hist2 == None: nTdebug( 'skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext)) continue m1 = mat(hist1, dtype='float') m2 = mat(hist2, dtype='float') m2 = m2.transpose() # pylint: disable=E1101 hist2d = multiply(m1, m2) cTuple = getEnsembleAverageAndSigmaHis(hist2d) (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable cTuple += tuple([ str([ssType, resType, resTypePrev, resTypeNext]) ]) # append the hash keys as a way of id. # nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % ( # ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax)) if c_sd == None: nTdebug( 'Failed to get c_sd when testing not all residues are present in smaller sets.' ) continue if c_sd == 0.: nTdebug( 'Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.' ) continue setDeepByKeys(histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext) # end for isI keyListSorted1 = valueByResTypes.keys() keyListSorted1.sort() for resType in keyListSorted1: keyListSorted2 = valueByResTypes[resType].keys() keyListSorted2.sort() for prevResType in keyListSorted2: d1List = valueByResTypes[resType][prevResType] if not d1List: nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType)) setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType) histd1, _bins, _patches = hist(value, bins=binCount, range=xRange) nTmessage("Count %6d in value" % sum(histd1)) # setDeepByKeys(histd1, hist1d, resType, prevResType) if os.path.exists(dbase_file_abs_name): os.unlink(dbase_file_abs_name) output = open(dbase_file_abs_name, 'wb') dbase = {} dbase[ 'histd1BySs0AndResTypes'] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only dbase['histd1BySs1AndResTypes'] = histd1BySs1AndResTypes dbase['histd1CtupleBySsAndResTypes'] = histd1CtupleBySsAndResTypes dbase['histd1ByResTypes'] = histd1ByResTypes # 56 kb dbase['histd1BySs0'] = histd1BySs0 # 4 kb dbase['histd1BySs1'] = histd1BySs1 dbase['histd1'] = histd1 # 4 kb cPickle.dump(dbase, output, 2) output.close()
from cing.core.database import NTdb from cing.core.database import saveToSML cing.verbosity = cing.verbosityDebug if __name__ == '__main__': if 1: # DEFAULT: 1 disable only when needed. nTwarning( "Don't execute this script %s by accident. It damages CING." % getCallerFileName()) sys.exit(1) # end if convention = 'INTERNAL_1' for rdef in NTdb.residuesWithProperties('protein'): nTdebug( "Xplor N-terminal and C-terminal atom name translations changed for %s", rdef) for name1, namex in [('H1', 'HT1'), ('H2', 'HT2'), ('H3', 'HT3'), ('OXT', 'OT2'), ('O', 'O,OT1')]: if name1 in rdef: rdef[name1].nameDict['XPLOR'] = namex #end if #end for #end for # save the new versions rootPath = os.path.realpath( os.path.join(cingPythonCingDir, 'Database', convention)) saveToSML(NTdb, rootPath, convention)
from cing.core.classes import RDCRestraint, RDCRestraintList # --------------------------------------------------------------------------------------------- # functional imports: Order matters! # --------------------------------------------------------------------------------------------- # Try a Yasara import # GV: We could change this by defining yasaradir in the CING setup try: from yasara import yasaradir # @UnresolvedImport # JFD: why not add the functionality from the plugin ? if os.path.exists(yasaradir): sys.path.append(os.path.join(yasaradir, "pym")) sys.path.append(os.path.join(yasaradir, "plg")) else: nTcodeerror('Yasara directory "%s" as defined in yasara.py module not found', yasaradir) exit(1) except: yasaradir = None # end try from cing.core.molecule import * from cing.core.importPlugin import importPlugin # This imports all plugins from cing.core.sml import obj2SML # This also initializes the SMLhandler methods from cing.core.sml import sML2obj # This also initializes the SMLhandler methods from cing.core.database import NTdb # @Reimport NTdb._restoreFromSML() # This initializes the database
script to update xplor N-terminal and C-terminal name conventions ''' from cing import cingPythonCingDir from cing.Libs.NTutils import * #@UnusedWildImport from cing.core.database import NTdb from cing.core.database import saveToSML cing.verbosity = cing.verbosityDebug if __name__ == '__main__': if 1: # DEFAULT: 1 disable only when needed. nTwarning("Don't execute this script %s by accident. It damages CING." % getCallerFileName()) sys.exit(1) # end if convention = 'INTERNAL_1' for rdef in NTdb.residuesWithProperties('protein'): nTdebug("Xplor N-terminal and C-terminal atom name translations changed for %s",rdef) for name1, namex in [('H1','HT1'), ('H2','HT2'), ('H3','HT3'), ('OXT','OT2'), ('O','O,OT1')]: if name1 in rdef: rdef[name1].nameDict['XPLOR'] = namex #end if #end for #end for # save the new versions rootPath = os.path.realpath(os.path.join(cingPythonCingDir, 'Database' , convention) ) saveToSML( NTdb, rootPath, convention )
def matchResidue2Cing(self, res): """ Match res to CING database using previously defined convention; Account for 'ill-defined' residues by examining crucial atom names. Use CYANA (==DIANA) Naming for conversion to INTERNAL (i.e. These names will not likely change) Return NTdb resDef object None on Error res is a NTtree object with the following attributes set after this routine: db skip resName and attributes for every atom it includes: HA2, CD1, ... """ # nTdebug("Now in _matchResidue2Cing: %s" % res) res.db = None res.skip = False # Residue names that are ambiguously defined by different PDB file formats if res.resName[0:3] == 'ARG': if 'HH1' in res: res.db = NTdb.getResidueDefByName('ARG', convention=CYANA) elif '1HH' in res: # Second set for CYANA 1.x, AMBER res.db = NTdb.getResidueDefByName('ARG', convention=CYANA) else: # Default protonated; this also assures most common for X-ray without protons res.db = NTdb.getResidueDefByName('ARG+', convention=CYANA) #end if #end if elif res.resName[0:3] == 'ASP': if 'HD2' in res: #print 'ASPH' res.db = NTdb.getResidueDefByName('ASP', convention=CYANA) else: # Default deprot; this also assures most common for X-ray without protons #print 'ASP' res.db = NTdb.getResidueDefByName('ASP-', convention=CYANA) #end if elif res.resName[0:3] == 'GLU': if 'HE2' in res: #print 'GLUH' res.db = NTdb.getResidueDefByName('GLU', convention=CYANA) else: # Default deprot; this also assures most common for X-ray without protons #print 'GLU' res.db = NTdb.getResidueDefByName('GLU-', convention=CYANA) #end if elif res.resName[0:3] == 'HIS': if 'HD1' in res and 'HE2' in res: #print 'HISH' res.db = NTdb.getResidueDefByName('HIS+', convention=CYANA) elif not 'HD1' in res and 'HE2' in res: # print HISE res.db = NTdb.getResidueDefByName('HIST', convention=CYANA) else: # Default HD1 #print 'HIS' res.db = NTdb.getResidueDefByName('HIS', convention=CYANA) #end if elif res.resName[0:3] == 'LYS': if ('HZ1' in res and not 'HZ3' in res): res.db = NTdb.getResidueDefByName('LYS', convention=CYANA) elif ('1HZ' in res and not '3HZ' in res): # Second set for CYANA 1.x res.db = NTdb.getResidueDefByName('LYS', convention=CYANA) else: # Default prot; this also assures most common for X-ray without protons res.db = NTdb.getResidueDefByName('LYS+', convention=CYANA) #end if elif res.resName in CYANA_NON_RESIDUES: res.skip = True elif res.resName == 'HOH' and self.skipWaters: res.skip = True else: res.db = NTdb.getResidueDefByName(res.resName, convention=self.convention) #end if # Only continue the search if not found and non-standard residues are allowed. if res.db: return res.db if not self.allowNonStandardResidue: res.skip = True return res.db # Try to match the residue using INTERNAL convention. res.db = NTdb.getResidueDefByName(res.resName) if res.db: return res.db # insert new residue. res.db = NTdb.appendResidueDef(name=res.resName, shortName='_', comment='From parsing PDB file') if not res.db: nTcodeerror( "Adding a non-standard residue should have been possible.") return None res.db.nameDict[self.convention] = res.resName # Just a check, disable for speed. _x = NTdb.getResidueDefByName(res.resName) if not _x: nTcodeerror( "Added residue but failed to find it again in pdbParser#_matchResidue2Cing" ) return res.db
def plotHistogramOverall(): graphicsFormat = "png" alpha = 0.8 # was 0.8; looks awful with alpha = 1 n = 20 # d = 3 # number of ss types. extent = (0, n) + (0, n) cmapList = [green_inv, blue_inv, yellow_inv] colorList = ['green', 'blue', 'yellow'] i = 1 # decides on color picked. # If set it will do a single ssType otherwise the overall. for doOverall in [False, True]: # for doOverall in [ True ]: if doOverall: ssTypeList = [None] else: ssTypeList = [' ', 'S', 'H'] for ssType in ssTypeList: m = zeros((n * n), dtype=int).reshape(n, n) # mBySs = zeros((n,n,d), dtype=int).reshape(n,n,d) tickList = [ NTdb.getResidueDefByName(resType).shortName for resType in common20AAList ] # tickListRev = tickList[:] # tickListRev.reverse() for r, resTypePrev in enumerate(common20AAList): for c, resType in enumerate(common20AAList): if doOverall: hist1 = getDeepByKeys(hPlot.histd1ByResTypes, resType, resTypePrev) else: hist1 = getDeepByKeys(hPlot.histd1BySs0AndResTypes, ssType, resType, resTypePrev) if hist1 == None: nTdebug('skipping for hist1 is empty for [%s] [%s]' % (resType, resTypePrev)) continue m[r, c] = sum(hist1) clf() # axes([.1, .1, .8, .8 ] ) xlabel('resType') ylabel('resTypePrev') xlim((0, n)) ylim((0, n)) offset = 0.5 xticks(arange(offset, n), tickList) yticks(arange(offset, n), tickList) # print 'just before call to set_ticks_position' # axis.xaxis.set_ticks_position('top') # axis.xaxis.set_label_position('top') # axis.yaxis.set_ticks_position('both') # axis.yaxis.set_label_position('left') grid(True) strTitle = "ssType: [%s]" % ssType title(strTitle) plot([0, n], [0, n], 'b-', linewidth=1) minCount = 300. maxCount = 1000. if False: minCount = 0. maxCount = 1. if ssType: minCount /= 3. maxCount /= 3. maxHist = amax(m) minHist = amin(m) sumHist = sum(m) nTmessage('ssType: %s' % ssType) nTmessage('maxHist: %s' % maxHist) # 9165 of total of ~ 1 M. nTmessage('minHist: %s' % minHist) # 210 nTmessage('sumHist: %s' % sumHist) # 210 # nTmessage('tickList: %s' % tickList) # 210 # his *= 100./maxHist his = masked_where(m <= minCount, m, copy=1) palette = cmapList[i] palette.set_under(color='red', alpha=1.0) # alpha is 0.0 palette.set_over( color=colorList[i], alpha=1.0 ) # alpha is 1.0 Important to make it a hard alpha; last plotted will rule. palette.set_bad(color='red', alpha=1.0) norm = Normalize(vmin=minCount, vmax=maxCount, clip=True) # clip is False imshow( his, interpolation='nearest', # interpolation='bicubic', origin='lower', extent=extent, alpha=alpha, cmap=palette, norm=norm) # mr = m[::-1] # reverses the rows, nice! # nTmessage('mr: %s' % mr) fn = "plotHistogram_%s_d1d2.%s" % (ssType, graphicsFormat) savefig(fn) clf() a = m.reshape(n * n) hist(a, 20) xlabel('pair count') ylabel('number of occurrences') title(strTitle) fn = "plotHistOfHist_%s_d1d2.%s" % (ssType, graphicsFormat) savefig(fn) # end loop over ssType # end over ssType overall return m
def main(): 'See above.' cvs_file_abs_name_gz = os.path.join(cingDirData, 'PluginCode', 'Whatif', cvs_file_abs_name + '.gz') gunzip(cvs_file_abs_name_gz) reader = csv.reader(open(cvs_file_abs_name, "rb"), quoting=csv.QUOTE_NONE) valueBySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 valueBySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 valueByResTypes = {} valueBySs0 = {} # keys are SSi valueBySs1 = {} # keys are SSi-1 histd1CtupleBySsAndResTypes = {} value = [] # NB is an array without being keyed. histd1BySs0AndResTypes = {} # keys are SSi, RTi, RTi-1 histd1BySs1AndResTypes = {} # keys are SSi-1, RTi, RTi-1 histd1ByResTypes = {} histd1BySs0 = {} histd1BySs1 = {} linesByEntry = {} lineCount = 0 for row in reader: lineCount += 1 if lineCount > lineCountMax: break entryId = row[0] if not linesByEntry.has_key(entryId): linesByEntry[ entryId ] = [] linesByEntry[ entryId ].append( row ) skippedResTypes = [] entryIdList = linesByEntry.keys() entryIdList.sort() # Do some pre filtering. for entryId2 in entryIdList: lineList = linesByEntry[ entryId2 ] for idx,line in enumerate(lineList): line.append(idx) lineListSorted = NTsort(lineList,BFACTOR_COLUMN,inplace=False) # Now throw away the worst 10 % of residues. n = len(lineListSorted) bad_count = int(round((n * DEFAULT_BFACTOR_PERCENTAGE_FILTER) / 100.)) to_remove_count = n-bad_count # nTmessage("Removing at least %d from %d residues" % (bad_count,n)) badIdxList = [lineItem[IDX_COLUMN] for lineItem in lineListSorted[to_remove_count:n]] iList = range(n) iList.reverse() for i in iList: lineItem = lineList[i] max_bfactor = float(lineItem[BFACTOR_COLUMN]) if max_bfactor > DEFAULT_MAX_BFACTOR: # nTdebug('Skipping because max bfactor in dihedral %.3f is above %.3f %s' % (max_bfactor, DEFAULT_MAX_BFACTOR, lineItem)) del lineList[i] # TODO: check if indexing is still right or we shoot in the foot. continue if i in badIdxList: # nTdebug('Skipping because bfactor worst %.3f %s' % (max_bfactor, lineItem)) del lineList[i] continue removed_count = n - len(lineList) # nTdebug("Reduced list by %d" % removed_count) if removed_count < bad_count: nTwarning("Failed to remove at least %d residues" % bad_count) for entryId2 in entryIdList: prevChainId = None prevResType = None prevResNum = None prevSsType = None for _r, row in enumerate(linesByEntry[ entryId2 ]): #1zzk,A,GLN , 17,E, 205.2, 193.6 #1zzk,A,VAL , 18,E, 193.6, 223.2 #1zzk,A,THR , 19,E, 223.2, 190.1 (entryId, chainId, resType, resNum, ssType, d1, _d2, _max_bfactor, _idx) = row resNum = int(resNum) ssType = to3StateDssp(ssType)[0] resType = resType.strip() db = NTdb.getResidueDefByName( resType ) if not db: nTerror("resType not in db: %s" % resType) return resType = db.nameDict['IUPAC'] d1 = d1.strip() d1 = floatParse(d1) if isNaN(d1): # nTdebug("d1 %s is a NaN on row: %s" % (d1,row)) continue if not inRange(d1): nTerror("d1 not in range for row: %s" % str(row)) return if not (resType in common20AAList): # nTmessage("Skipping uncommon residue: %s" % resType) if not ( resType in skippedResTypes): skippedResTypes.append( resType ) continue if isSibling(chainId, resNum, prevChainId, prevResNum): appendDeepByKeys(valueBySs0AndResTypes, d1, ssType, resType, prevResType) appendDeepByKeys(valueBySs1AndResTypes, d1, prevSsType, resType, prevResType) appendDeepByKeys(valueByResTypes, d1, resType, prevResType) appendDeepByKeys(valueBySs0, d1, ssType) appendDeepByKeys(valueBySs1, d1, prevSsType) value.append( d1 ) prevResType = resType prevResNum = resNum prevChainId = chainId prevSsType = ssType os.unlink(cvs_file_abs_name) nTmessage("Skipped skippedResTypes: %r" % skippedResTypes ) nTmessage("Got count of values: %r" % len(value) ) # fill FOUR types of hist. # TODO: filter differently for pro/gly keyListSorted1 = valueBySs0AndResTypes.keys() keyListSorted1.sort() for isI in (True, False): if isI: valueBySs = valueBySs0 valueBySsAndResTypes = valueBySs0AndResTypes histd1BySs = histd1BySs0 histd1BySsAndResTypes = histd1BySs0AndResTypes else: valueBySs = valueBySs1 valueBySsAndResTypes = valueBySs1AndResTypes histd1BySs = histd1BySs1 histd1BySsAndResTypes = histd1BySs1AndResTypes for ssType in keyListSorted1: # keyListSorted1b = deepcopy(keyListSorted1) # for ssTypePrev in keyListSorted1b: d1List = valueBySs[ssType] if not d1List: nTerror("Expected d1List from valueBySs[%s]" % (ssType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) nTmessage("Count %6d in valueBySs[%s]" % (sum(hist1d), ssType)) setDeepByKeys(histd1BySs, hist1d, ssType) keyListSorted2 = valueBySsAndResTypes[ssType].keys() keyListSorted2.sort() for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySsAndResTypes[ssType][resType].keys() keyListSorted3.sort() for prevResType in keyListSorted3: # nTmessage("Working on valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) d1List = valueBySsAndResTypes[ssType][resType][prevResType] if not d1List: nTerror("Expected d1List from valueBySsAndResTypes[%s][%s][%s]" % (ssType, resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueBySsAndResTypes[%s][%s][%s]" % (sum(hist1d), ssType, resType, prevResType)) setDeepByKeys(histd1BySsAndResTypes, hist1d, ssType, resType, prevResType) # Now that they are all in we can redo this. # Delete the reference -not- the object. valueBySs = None valueBySsAndResTypes = None histd1BySs = None histd1BySsAndResTypes = None for ssType in keyListSorted1: for resType in keyListSorted2: # nTmessage("Working on valueBySsAndResTypes for [%s][%s]" % (ssType, resType)) # nice for balancing output verbosity. keyListSorted3 = valueBySs0AndResTypes[ssType][resType].keys() keyListSorted3.sort() for resTypePrev in keyListSorted3: keyListSorted4 = keyListSorted3[:] # take a copy for resTypeNext in keyListSorted4: hist1 = getDeepByKeys(histd1BySs0AndResTypes, ssType, resType, resTypePrev) # x-axis # This was bug! It needs to be hashed on the ssType of resType -not- on resTypeNext hist2 = getDeepByKeys(histd1BySs1AndResTypes, ssType, resTypeNext, resType) if hist1 == None: nTdebug('skipping for hist1 is empty for [%s] [%s] [%s]' % (ssType, resTypePrev, resType)) continue if hist2 == None: nTdebug('skipping for hist2 is empty for [%s] [%s] [%s]' % (ssType, resType, resTypeNext)) continue m1 = mat(hist1,dtype='float') m2 = mat(hist2,dtype='float') m2 = m2.transpose() # pylint: disable=E1101 hist2d = multiply(m1,m2) cTuple = getEnsembleAverageAndSigmaHis( hist2d ) (_c_av, c_sd, _hisMin, _hisMax) = cTuple #@UnusedVariable cTuple += tuple([str([ssType, resType, resTypePrev, resTypeNext])]) # append the hash keys as a way of id. # nTdebug("For ssType %s residue types %s %s %s found (av/sd/min/max) %8.0f %8.0f %8.0f %8.0f" % ( # ssType, resType, resTypePrev, resTypeNext, c_av, c_sd, hisMin, hisMax)) if c_sd == None: nTdebug('Failed to get c_sd when testing not all residues are present in smaller sets.') continue if c_sd == 0.: nTdebug('Got zero c_sd, ignoring histogram. This should only occur in smaller sets. Not setting values.') continue setDeepByKeys( histd1CtupleBySsAndResTypes, cTuple, ssType, resType, resTypePrev, resTypeNext) # end for isI keyListSorted1 = valueByResTypes.keys() keyListSorted1.sort() for resType in keyListSorted1: keyListSorted2 = valueByResTypes[resType].keys() keyListSorted2.sort() for prevResType in keyListSorted2: d1List = valueByResTypes[resType][prevResType] if not d1List: nTerror("Expected d1List from valueByResTypes[%s][%s]" % (resType, prevResType)) continue hist1d, _bins, _patches = hist(d1List, bins=binCount, range=xRange) # nTmessage("Count %6d in valueByResTypes[%s][%s]" % (sum(hist1d), resType, prevResType)) setDeepByKeys(histd1ByResTypes, hist1d, resType, prevResType) histd1, _bins, _patches = hist(value, bins=binCount, range=xRange) nTmessage("Count %6d in value" % sum(histd1)) # setDeepByKeys(histd1, hist1d, resType, prevResType) if os.path.exists(dbase_file_abs_name): os.unlink(dbase_file_abs_name) output = open(dbase_file_abs_name, 'wb') dbase = {} dbase[ 'histd1BySs0AndResTypes' ] = histd1BySs0AndResTypes # 92 kb uncompressed in the case of ~1000 lines only dbase[ 'histd1BySs1AndResTypes' ] = histd1BySs1AndResTypes dbase[ 'histd1CtupleBySsAndResTypes' ] = histd1CtupleBySsAndResTypes dbase[ 'histd1ByResTypes' ] = histd1ByResTypes # 56 kb dbase[ 'histd1BySs0' ] = histd1BySs0 # 4 kb dbase[ 'histd1BySs1' ] = histd1BySs1 dbase[ 'histd1' ] = histd1 # 4 kb cPickle.dump(dbase, output, 2) output.close()
def __init__(self, seqFile, protFile, convention): NTdict.__init__(self) #print '>', seqFile, protFile # parse the seqFile self.seq = {} resNum = 1 self.resCount = 0 for f in AwkLike(seqFile, commentString='#'): #print '>>', f.dollar[0] if (not f.isEmpty() and not f.isComment('#')): if (f.dollar[1] in CYANA_NON_RESIDUES # skip the bloody CYANA non-residue stuff ): pass elif (not NTdb.isValidResidueName(f.dollar[1], convention)): nTerror( 'Xeasy: residue "%s" invalid for convention "%s" in "%s:%d"', f.dollar[1], convention, seqFile, f.NR) self.error = 1 else: if (f.NF > 1): resNum = f.int(2) if resNum == None: self.error = 1 #end if #endif self.seq[resNum] = f.dollar[ 1] # store original 'convention' name resNum += 1 self.resCount += 1 #end if #end if #end for self.seqFile = seqFile self.convention = convention # parse the prot file self.prot = {} self.protCount = 0 self.error = 0 for f in AwkLike(protFile, commentString='#'): if f.NF == 5: # Xeasy/Cyana atom index index = f.int(1) atomName = f.dollar[4] resNum = f.int(5) if resNum not in self.seq: nTwarning( 'Xeasy: undefined residue number %d in "%s:%d" (%s)' % (resNum, protFile, f.NR, f.dollar[0])) self.error = 1 else: resName = self.seq[resNum] if not NTdb.isValidAtomName(resName, atomName, convention): nTwarning( 'Xeasy parsing "%s:%d": invalid atom "%s" for residue %s%d' % (protFile, f.NR, atomName, resName, resNum)) self.error = 1 else: p = NTdict(index=index, shift=f.float(2), error=f.float(3), atomName=atomName, resNum=resNum, resName=resName, atom=None) self.prot[index] = p self.protCount += 1 #end if #end if #end if #end for self.protFile = protFile nTmessage('Xeasy.__init__: parsed %d residues, %d atoms from %s, %s', self.resCount, self.protCount, self.seqFile, self.protFile)