def printSequenceFromPdbFile(fn): verbosityOriginal = cing.verbosity cing.verbosity = cing.verbosityError entryId = 'getSequenceFromPdbFile' project = Project(entryId) project.removeFromDisk() project = Project.open(entryId, status='new') project.initPDB(pdbFile=fn, convention=IUPAC) fastaString = '' for res in project.molecule.allResidues(): # db doesn't always exist. fastaString += getDeepByKeysOrDefault(res, defaultPrintChainCode, 'db', 'shortName') cing.verbosity = verbosityOriginal nTmessage("Sequence from PDB file:") nTmessage(fastaString) for res in project.molecule.allResidues(): nTmessageNoEOL(res.shortName) nTmessage('') cing.verbosity = cing.verbosityError project.removeFromDisk() del project cing.verbosity = verbosityOriginal
def test_pdb(self): cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) entryId = "1brv" # Small much studied PDB NMR entry # entryId = "tightTurn_IIb" # entryId = "1hy8" # small, single model, very low scoring entry pdbDirectory = os.path.join(cingDirTestsData,"pdb", entryId) pdbFileName = "pdb"+entryId+".ent" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) # does it matter to import it just now? project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) project.initPDB( pdbFile=pdbFilePath, convention = IUPAC ) m = project.molecule ranges = 'A.173-178' nTdebug("m: %s" % m) self.assertTrue( m.toPDB('m001.pdb', model=0, ranges=ranges, convention='XPLOR')) # nTdebug("Manual reimport") # m.initCoordinates() # m.importFromPDB('m001.pdb',convention='XPLOR') nTdebug("Reimport 1") m.replaceCoordinatesByPdb(pdbFilePath, name = entryId+'_reimport', convention=IUPAC) # nTdebug("Reimport 2") # m.replaceCoordinatesByPdb(pdbFilePath, name = entryId+'_reimport', useModels = "1", convention=IUPAC) self.assertFalse(project.mkMacros())
def test_pdb(self): cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) entryId = "1brv" # Small much studied PDB NMR entry # entryId = "tightTurn_IIb" # entryId = "1hy8" # small, single model, very low scoring entry pdbDirectory = os.path.join(cingDirTestsData,"pdb", entryId) pdbFileName = "pdb"+entryId+".ent" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) # does it matter to import it just now? project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) project.initPDB( pdbFile=pdbFilePath, convention = IUPAC ) m = project.molecule ranges = 'A.173-178' nTdebug("m: %s" % m) self.assertTrue( m.toPDB('m001.pdb', model=0, ranges=ranges, convention='XPLOR')) # nTdebug("Manual reimport") # m.initCoordinates() # m.importFromPDB('m001.pdb',convention='XPLOR') nTdebug("Reimport 1") m.replaceCoordinatesByPdb(pdbFilePath, name = entryId+'_reimport', convention=IUPAC) # nTdebug("Reimport 2") # m.replaceCoordinatesByPdb(pdbFilePath, name = entryId+'_reimport', useModels = "1", convention=IUPAC) self.assertFalse(project.mkMacros())
def testPdbFile(self): nTwarning( "This test case will take about 5 (+3 for 1v0e) minutes and is recommended to be done before major releases." ) # entryId = "1ai0" # Most complex molecular system in any PDB NMR entry # entryId = "1brv" # Small much studied PDB NMR entry # entryId = "2hgh_1model" # entryList = "1kr8".split() # entryList = "1otz".split() # 61 chains of which one is ' ' # entryList = "1v0e".split() # entryList = "1a4d 1a24 1afp 1ai0 1brv 1bus 1cjg 1hue 1ieh 1iv6 1kr8 1otz 2hgh 2k0e".split() entryList = "1a4d 1ai0 1brv 1bus 1hue 1iv6 1kr8".split() for entryId in entryList: pdbDirectory = os.path.join(cingDirTestsData, "pdb", entryId) pdbFileName = "pdb" + entryId + ".ent" pdbFilePath = os.path.join(pdbDirectory, pdbFileName) cingDirTmpTest = os.path.join(cingDirTmp, 'test2_pdb') mkdirs(cingDirTmpTest) os.chdir(cingDirTmpTest) # does it matter to import it just now? project = Project(entryId) self.failIf(project.removeFromDisk()) project = Project.open(entryId, status='new') self.assertTrue( project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, allowNonStandardResidue=True)) self.assertTrue(project.save())
def testPdbFile(self): nTwarning("This test case will take about 5 (+3 for 1v0e) minutes and is recommended to be done before major releases.") # entryId = "1ai0" # Most complex molecular system in any PDB NMR entry # entryId = "1brv" # Small much studied PDB NMR entry # entryId = "2hgh_1model" # entryList = "1kr8".split() # entryList = "1otz".split() # 61 chains of which one is ' ' # entryList = "1v0e".split() # entryList = "1a4d 1a24 1afp 1ai0 1brv 1bus 1cjg 1hue 1ieh 1iv6 1kr8 1otz 2hgh 2k0e".split() entryList = "1a4d 1ai0 1brv 1bus 1hue 1iv6 1kr8".split() for entryId in entryList: pdbDirectory = os.path.join(cingDirTestsData,"pdb", entryId) pdbFileName = "pdb"+entryId+".ent" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) cingDirTmpTest = os.path.join( cingDirTmp, 'test2_pdb' ) mkdirs( cingDirTmpTest ) os.chdir(cingDirTmpTest) # does it matter to import it just now? project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) self.assertTrue( project.initPDB( pdbFile=pdbFilePath, convention=IUPAC, allowNonStandardResidue=True )) self.assertTrue( project.save() )
def test_xplor_nih(self): cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) entryId = "gb1" pdbDirectory = os.path.join(cingDirTestsData,"xplor", entryId) pdbFileName = entryId+".pdb" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) project.initPDB( pdbFile=pdbFilePath, convention = XPLOR ) # project.validate(ranges, parseOnly, htmlOnly, doProcheck, doWhatif, doWattos, doTalos) project.validate(htmlOnly=True, doProcheck=False, doWhatif=False, doWattos=False, doTalos=False) project.save()
def test_superpose(self): pdbConvention = IUPAC entryId = "1brv" # entryId = "2vb1_simple" # Protein solved by X-ray. cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) pdbDirectory = os.path.join(cingDirTestsData,"pdb", entryId) pdbFileName = "pdb" + entryId + ".ent" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) self.failIf( not os.path.exists(pdbFilePath), msg= "Failed to find file: "+pdbFilePath) # does it matter to import it just now? project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) project.initPDB( pdbFile=pdbFilePath, convention = pdbConvention ) # Compare with molmol on 1brv's 48 models: # mean global bb RMSD: 0.98 +/- 0.40 A ( 0.10.. 2.19 A) # mean global heavy RMSD: 1.75 +/- 0.51 A ( 0.54.. 3.33 A) # Note that in molmol the backbone protein atoms are defined: N, CA, C # CING used to include the carbonyl atom # using default parameters. ens = project.molecule.superpose(backboneOnly=True, includeProtons = False, iterations=2) nTdebug( 'ens %s' % ens) nTdebug( 'ens.averageModel %s' % ens.averageModel) self.assertAlmostEquals( 0.7643199324863148, ens.averageModel.rmsd, 3 ) # Confirmed to be the 'averaage RMSD to mean: 0.698' in molmol using command # Fit 'to_mean'. ens = project.molecule.superpose(backboneOnly=False, includeProtons = False, iterations=3) # no improvement to do 3 over the default 2 but left in for speed checking. nTdebug( 'ens.averageModel %s' % ens.averageModel) self.assertAlmostEquals( 0.99383582432002637, ens.averageModel.rmsd, 3 )
def test_superpose(self): pdbConvention = IUPAC entryId = "1brv" # entryId = "2vb1_simple" # Protein solved by X-ray. cingDirTmpTest = os.path.join( cingDirTmp, getCallerName() ) mkdirs( cingDirTmpTest ) self.failIf(os.chdir(cingDirTmpTest), msg = "Failed to change to test directory for files: " + cingDirTmpTest) pdbDirectory = os.path.join(cingDirTestsData,"pdb", entryId) pdbFileName = "pdb" + entryId + ".ent" pdbFilePath = os.path.join( pdbDirectory, pdbFileName) self.failIf( not os.path.exists(pdbFilePath), msg= "Failed to find file: "+pdbFilePath) # does it matter to import it just now? project = Project( entryId ) self.failIf( project.removeFromDisk()) project = Project.open( entryId, status='new' ) project.initPDB( pdbFile=pdbFilePath, convention = pdbConvention ) # Compare with molmol on 1brv's 48 models: # mean global bb RMSD: 0.98 +/- 0.40 A ( 0.10.. 2.19 A) # mean global heavy RMSD: 1.75 +/- 0.51 A ( 0.54.. 3.33 A) # Note that in molmol the backbone protein atoms are defined: N, CA, C # CING used to include the carbonyl atom # using default parameters. ens = project.molecule.superpose(backboneOnly=True, includeProtons = False, iterations=2) nTdebug( 'ens %s' % ens) nTdebug( 'ens.averageModel %s' % ens.averageModel) self.assertAlmostEquals( 0.7643199324863148, ens.averageModel.rmsd, 3 ) # Confirmed to be the 'averaage RMSD to mean: 0.698' in molmol using command # Fit 'to_mean'. ens = project.molecule.superpose(backboneOnly=False, includeProtons = False, iterations=3) # no improvement to do 3 over the default 2 but left in for speed checking. nTdebug( 'ens.averageModel %s' % ens.averageModel) self.assertAlmostEquals( 0.99383582432002637, ens.averageModel.rmsd, 3 )
def test_xplor_nih(self): cingDirTmpTest = os.path.join(cingDirTmp, getCallerName()) mkdirs(cingDirTmpTest) self.failIf(os.chdir(cingDirTmpTest), msg="Failed to change to test directory for files: " + cingDirTmpTest) entryId = "gb1" pdbDirectory = os.path.join(cingDirTestsData, "xplor", entryId) pdbFileName = entryId + ".pdb" pdbFilePath = os.path.join(pdbDirectory, pdbFileName) project = Project(entryId) self.failIf(project.removeFromDisk()) project = Project.open(entryId, status='new') project.initPDB(pdbFile=pdbFilePath, convention=XPLOR) # project.validate(ranges, parseOnly, htmlOnly, doProcheck, doWhatif, doWattos, doTalos) project.validate(htmlOnly=True, doProcheck=False, doWhatif=False, doWattos=False, doTalos=False) project.save()
def printSequenceFromPdbFile(fn): verbosityOriginal = cing.verbosity cing.verbosity = cing.verbosityError entryId = 'getSequenceFromPdbFile' project = Project(entryId) project.removeFromDisk() project = Project.open(entryId, status='new') project.initPDB(pdbFile=fn, convention=IUPAC) fastaString = '' for res in project.molecule.allResidues(): # db doesn't always exist. fastaString += getDeepByKeysOrDefault(res, defaultPrintChainCode, 'db', 'shortName') cing.verbosity = verbosityOriginal nTmessage("Sequence from PDB file:") nTmessage(fastaString) for res in project.molecule.allResidues(): nTmessageNoEOL(res.shortName) nTmessage('') cing.verbosity = cing.verbosityError project.removeFromDisk() del project cing.verbosity = verbosityOriginal
def plotDihedralD1D2(): dihedralName1 = 'Cb4N' dihedralName2 = 'Cb4C' graphicsFormat = "png" entryId = "1brv" # Small much studied PDB NMR entry # entryId = "1hy8" # small, single model, very low scoring entry pdbDirectory = os.path.join(cingDirTestsData, "pdb", entryId) pdbFileName = "pdb" + entryId + ".ent" pdbFilePath = os.path.join(pdbDirectory, pdbFileName) # does it matter to import it just now? project = Project(entryId) project.removeFromDisk() project = Project.open(entryId, status='new') project.initPDB(pdbFile=pdbFilePath, convention=IUPAC) # ssType = 'E' # resType = 'GLY' # for ssType in histRamaBySsAndResType.keys(): # ssTypeForFileName = ssType.replace(' ', '_') titleStr = 'd1d2 all resType' nTmessage("plotting: %s" % titleStr) # hist = histd1d2BySsAndResType[ssType][resType] ps = NTplotSet() # closes any previous plots ps.hardcopySize = (500, 500) # residueName = resType + "" x = NTlist(-45, -80, 125) # outside the range. y = NTlist(-65, -63, -125) # important to switch to temp space before starting to generate files for the project. # project = Project('testPlotHistoDihedrald1d2') plotparams1 = project.plotParameters.getdefault(dihedralName1, 'dihedralDefault') plotparams2 = project.plotParameters.getdefault(dihedralName2, 'dihedralDefault') x.limit(plotparams1.min, plotparams1.max) y.limit(plotparams2.min, plotparams2.max) plot = NTplot(title=titleStr, xRange=(plotparams1.min, plotparams1.max), xTicks=range(int(plotparams1.min), int(plotparams1.max + 1), plotparams1.ticksize), xLabel=dihedralName1, yRange=(plotparams2.min, plotparams2.max), yTicks=range(int(plotparams2.min), int(plotparams2.max + 1), plotparams2.ticksize), yLabel=dihedralName2) ps.addPlot(plot) # Plot a density background histList = [] ssTypeList = hPlot.histd1BySs0.keys( ) # TODO: check this histd1BySs0 attribute. UNTESTED. ssTypeList.sort() # in place sort to: space, H, S for ssType in ssTypeList: hist = getDeepByKeys(hPlot.histd1BySs0, ssType) if hist != None: nTdebug('appending [%s]' % ssType) histList.append(hist) if histList: plot.dihedralComboPlot(histList) # fn = os.path.join('bySsAndResType', ( ssTypeForFileName+"_"+resType+"."+graphicsFormat)) # fn = os.path.join('byResType', ( resType+"."+graphicsFormat)) fpGood = open(project.name + '.testCb2Good.out', 'w') fpBad = open(project.name + '.testCb2Bad.out', 'w') mCount = project.molecule.modelCount for res in project.molecule.A.allResidues(): triplet = NTlist() for i in [-1, 0, 1]: triplet.append(res.sibling(i)) if None in triplet: nTdebug('Skipping ' % res) else: ca_atms = triplet.zap('CA') cb_atms = triplet.zap('CB') nTdebug("%s %s %s %s" % (res, triplet, ca_atms, cb_atms)) if None in cb_atms: # skip Gly for now nTdebug('Skipping %s' % res) else: d1 = Dihedral(res, 'Cb4N', range=[0.0, 360.0]) d1.atoms = [cb_atms[0], ca_atms[0], ca_atms[1], cb_atms[1]] d1.calculateValues() res['Cb4N'] = d1 # append dihedral to residue d2 = Dihedral(res, 'Cb4C', range=[0.0, 360.0]) d2.atoms = [cb_atms[1], ca_atms[1], ca_atms[2], cb_atms[2]] d2.calculateValues() res['Cb4C'] = d2 # append dihedral to residue bb = getDeepByKeys(res, WHATIF_STR, BBCCHK_STR, VALUE_LIST_STR, 0) # check first one. if bb == None: nTdebug('Skipping without BB %s' % res) continue if d1.cv < 0.03 and d2.cv < 0.03: # Only include structured residues for i in range(mCount): # Consider each model individually # bb = res.Whatif.bbNormality.valueList[i] bb = getDeepByKeys(res, WHATIF_STR, BBCCHK_STR, VALUE_LIST_STR, i) if bb == None: nTdebug('Skipping without BB %s' % res) continue angles = NTlist() # store phi, psi, chi1, chi2 for angle in ['PHI', 'PSI', 'CHI1', 'CHI2']: if res.has_key(angle): angles.append(res[angle][i]) else: angles.append(0.0) #end for if bb < 20.0: # Arbitrary 20 bb occurences as cuttoff for now fprintf(fpBad, '%4d %7.2f %7.2f %7.2f %s %s %s\n', res.resNum, d1[i], d2[i], bb, angles.format("%7.2f "), res, res.dssp.consensus) else: fprintf(fpGood, '%4d %7.2f %7.2f %7.2f %s %s %s\n', res.resNum, d1[i], d2[i], bb, angles.format("%7.2f "), res, res.dssp.consensus) #end if #end if #end if #end for fpBad.close() fpGood.close() fn = "allRestype_d1d2." + graphicsFormat ps.hardcopy(fn, graphicsFormat)
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = False # default: False htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doQueeny = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. # NB leave this set to True or modify code below. removeCcpnDirectory = 1 # perhaps not so in the future. modelCount = None # default setting is None # ranges = None if fastestTest: modelCount = 2 # if this is more and there is only one model present it leads to an error message. htmlOnly = True doWhatif = False doProcheck = False doWattos = False doQueeny = False doTalos = False forceRedo = True forceRetrieveInput = True nTmessage(header) nTmessage(getStartMessage()) # Sync below code with nrgCing#createToposTokens expectedArgumentList = """ verbosity inputDir outputDir pdbConvention restraintsConvention archiveType projectType storeCING2db ranges filterTopViolations filterVasco singleCoreOperation """.split() expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTmessage("consider updating code to include all sequential parameters: %s" % str(expectedArgumentList)) if len(extraArgList) > expectedNumberOfArguments: nTerror("Got arguments: " + str(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % ( expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True # end if # end if entryCodeChar2and3 = entryId[1:3] cing.verbosity = int( extraArgList[IDX_VERBOSITY] ) inputDir = extraArgList[IDX_INPUT] outputDir = os.path.join(extraArgList[IDX_OUTPUT], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[IDX_PDB] #@UnusedVariable restraintsConvention = extraArgList[IDX_RESTRAINTS] archiveType = extraArgList[IDX_ARCHIVE] # Only used for deriving the input location not the output. projectType = extraArgList[IDX_PROJECT_TYPE] storeCING2db = stringMeansBooleanTrue( getDeepByKeysOrAttributes(extraArgList, IDX_STORE_DB)) ranges = getDeepByKeysOrAttributes(extraArgList, IDX_RANGES) filterTopViolations = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_TOP) if filterTopViolations: filterTopViolations = int(filterTopViolations) # change '0' to 0 filterVasco = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_VASCO) if filterVasco: filterVasco = int(filterVasco) else: filterVasco = 1 # Default should be True # end if singleCoreOperation = getDeepByKeysOrAttributes(extraArgList, IDX_SINGLE_CORE_OPERATION ) if singleCoreOperation: singleCoreOperation = int(singleCoreOperation) else: singleCoreOperation = 0 # Default should be True # end if if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) isRemoteOutputDir = False if '@' in outputDir: isRemoteOutputDir = True # end if # vc = vCing('.') # argument is a fake master_ssh_url not needed here. archive_id = getArchiveIdFromDirectoryName( outputDir ) nTdebug("Using program arguments:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) nTdebug("filterTopViolations: %s" % filterTopViolations) nTdebug("filterVasco: %s" % filterVasco) nTdebug("singleCoreOperation: %s" % singleCoreOperation) nTdebug("") nTdebug("Using derived settings:") nTdebug("modelCount: %s" % modelCount) nTdebug("isRemoteOutputDir: %s" % isRemoteOutputDir) nTdebug("archive_id: %s" % archive_id) # For NMR_REDO required as most efficient. if singleCoreOperation: setToSingleCoreOperation() # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if forceRedo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. if isRemoteOutputDir: os.chdir(cingDirTmp) else: os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. formatFileName = '%s.tgz' if projectType == PROJECT_TYPE_CING: formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) allowedInputProtocolList = 'http file ssh'.split() inputProtocal = string.split( inputDir, ':' )[0] if inputProtocal in allowedInputProtocolList: stillToRetrieve = False if os.path.exists(fileNameTgz): if forceRetrieveInput: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if else: nTdebug("Entry not retrieved which might be normal in some situations.") # end if. if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. # fullFileNameTgz = os.path.join(inputDir, fileNameTgz) # shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') pdbFilePath = entryId + ".pdb" gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # end if if ranges is not None: project.molecule.setRanges(ranges) # end if if archive_id: project.molecule.setArchiveId(archive_id) # end if project.molecule.superpose(ranges=ranges) if filterTopViolations and not project.filterHighRestraintViol(): nTerror("Failed to filterHighRestraintViol") ####> MAIN UTILITY HERE if 0: # DEFAULT 0 project.save() if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doQueeny = doQueeny, doTalos=doTalos, filterVasco = filterVasco ): nTerror("Failed to validate project read") return True # end if filterVasco # Write a single PDB file containing all models # according to IUPAC conventions project.export2PDB() project.save() if storeCING2db and archive_id: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. # archive_id = ARCHIVE_DEV_NRG_ID # if isProduction: # archive_id = ARCHIVE_NRG_ID try: if doStoreCING2db( entryId, archive_id, project=project): nTerror("Failed to store CING project's data to DB but continuing.") except: nTtracebackError() nTerror("Failed to store CING project's data due to above traceback error.") if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' os.unlink(fileNameTgz) # temporary ccpn tgz if removeCcpnDirectory: rmdir(entryId) # ccpn dir may contain vasco info. if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) nTdebug("cmd: %s" % cmd) # do_cmd(cmd) status, result = commands.getstatusoutput(cmd) if status: nTerror("Failed to tar status: %s with result %s" % (status, result)) return True if isRemoteOutputDir: if putFileBySsh(tgzFileNameCing, outputDir, ntriesMax = 2): nTerror("Failed to send File By Scp status: %s with result %s" % (status, result)) nTerror("Maintaining results.") return True # end if nTmessage("Removing tgz result: %s" % tgzFileNameCing) os.remove(tgzFileNameCing) nTmessage("Removing cing dir itself: %s" % directoryNameCing) rmdir(directoryNameCing) else: # do NOT remove local copy pass
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = True # default: False # ranges=AUTO_STR # default is None retrieved from DBMS csv files. htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. modelCount = None # default setting is None if fastestTest: modelCount = 3 htmlOnly = True doWhatif = False doProcheck = False doWattos = False doTalos = False force_redo = True force_retrieve_input = True nTmessage(cing.cingDefinitions.getHeaderString()) nTmessage(cing.systemDefinitions.getStartMessage()) expectedArgumentList = [ 'inputDir', 'outputDir', 'pdbConvention', 'restraintsConvention', 'archiveType','projectType','storeCING2db'] expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTerror("Got arguments: " + repr(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % ( expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True entryCodeChar2and3 = entryId[1:3] inputDir = extraArgList[0] outputDir = os.path.join(extraArgList[1], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[2] #@UnusedVariable restraintsConvention = extraArgList[3] archiveType = extraArgList[4] projectType = extraArgList[5] storeCING2db = False if len(extraArgList) >= expectedNumberOfArguments: storeCING2db = extraArgList[6] if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) ranges = None # targetId = getTargetForFullEntryName(entryId) # if not targetId: # nTerror("Failed to getTargetForFullEntryName for entryId: %s" % entryId) # return True # ranges = getRangesForTarget(targetId) # if ranges == None: # nTerror("Failed to getRangesForTarget for targetId: %s" % targetId) # return True nTdebug("Using:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("modelCount: %s" % modelCount) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if force_redo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. # extension = '.tgz' formatFileName = '%s.tgz' # fileNameTgz = entryId + '.tgz' if projectType == PROJECT_TYPE_CING: # fileNameTgz = entryId + '.cing.tgz' formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) # if true will do retrieveTgzFromUrl. if inputDir.startswith("http") or inputDir.startswith("file"): stillToRetrieve = False if os.path.exists(fileNameTgz): if force_retrieve_input: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if # end if. # retrieveTgzFromUrl(entryId, inputDir) if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. fullFileNameTgz = os.path.join(inputDir, fileNameTgz) shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') # pdbFileFormats = [ entryId + ".pdb", "pdb" + entryId + ".ent.gz" ] # for pdbFileName in pdbFileFormats: # pdbFileName = "pdb" + entryId + ".ent.gz" # # pdbFilePath = os.path.join( inputDir, pdbFileName) # pdbFilePath = os.path.join(inputDir, pdbFileName) # if os.path.exists(pdbFilePath): # break # tmpPdbFile = None # if pdbFileName.endswith('.gz'): pdbFilePath = entryId + ".pdb" # tmpPdbFile = pdbFilePath # if os.path.exists(pdbFilePath): # os.unlink(pdbFilePath) gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # if inputDirOrg == inputDirCASD_NMR: # if True: # Default is False for this is specific to CASD-NMR # nTmessage("Renaming molecule name to entry id: %s" % entryId) # project.molecule.name = entryId # insufficient since all data is already initialized to disk. # project.molecule.rename( entryId ) # project.save() # project.molecule.ranges = ranges # JFD: this doesn't seem to be set there exactly. project.molecule.superpose(ranges=ranges) if True: if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doTalos=doTalos): nTerror("Failed to validate project read") return True if storeCING2db: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. try: if doStoreCING2db( entryId, ARCHIVE_CASP_ID, project=project): nTerror("Failed to store CING project's data to DB but continuing.") except: nTtracebackError() nTerror("Failed to store CING project's data due to above traceback error.") project.save() if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' # os.unlink(fileNameTgz) # temporary ccpn tgz rmdir(entryId) # temporary ccpn dir if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) do_cmd(cmd)
def plotDihedralD1D2(): dihedralName1 = 'Cb4N' dihedralName2 = 'Cb4C' graphicsFormat = "png" entryId = "1brv" # Small much studied PDB NMR entry # entryId = "1hy8" # small, single model, very low scoring entry pdbDirectory = os.path.join(cingDirTestsData, "pdb", entryId) pdbFileName = "pdb" + entryId + ".ent" pdbFilePath = os.path.join(pdbDirectory, pdbFileName) # does it matter to import it just now? project = Project(entryId) project.removeFromDisk() project = Project.open(entryId, status='new') project.initPDB(pdbFile=pdbFilePath, convention=IUPAC) # ssType = 'E' # resType = 'GLY' # for ssType in histRamaBySsAndResType.keys(): # ssTypeForFileName = ssType.replace(' ', '_') titleStr = 'd1d2 all resType' nTmessage("plotting: %s" % titleStr) # hist = histd1d2BySsAndResType[ssType][resType] ps = NTplotSet() # closes any previous plots ps.hardcopySize = (500, 500) # residueName = resType + "" x = NTlist(-45, -80, 125) # outside the range. y = NTlist(-65, -63, -125) # important to switch to temp space before starting to generate files for the project. # project = Project('testPlotHistoDihedrald1d2') plotparams1 = project.plotParameters.getdefault(dihedralName1, 'dihedralDefault') plotparams2 = project.plotParameters.getdefault(dihedralName2, 'dihedralDefault') x.limit(plotparams1.min, plotparams1.max) y.limit(plotparams2.min, plotparams2.max) plot = NTplot(title=titleStr, xRange=(plotparams1.min, plotparams1.max), xTicks=range(int(plotparams1.min), int(plotparams1.max + 1), plotparams1.ticksize), xLabel=dihedralName1, yRange=(plotparams2.min, plotparams2.max), yTicks=range(int(plotparams2.min), int(plotparams2.max + 1), plotparams2.ticksize), yLabel=dihedralName2) ps.addPlot(plot) # Plot a density background histList = [] ssTypeList = hPlot.histd1BySs0.keys() # TODO: check this histd1BySs0 attribute. UNTESTED. ssTypeList.sort() # in place sort to: space, H, S for ssType in ssTypeList: hist = getDeepByKeys(hPlot.histd1BySs0, ssType) if hist != None: nTdebug('appending [%s]' % ssType) histList.append(hist) if histList: plot.dihedralComboPlot(histList) # fn = os.path.join('bySsAndResType', ( ssTypeForFileName+"_"+resType+"."+graphicsFormat)) # fn = os.path.join('byResType', ( resType+"."+graphicsFormat)) fpGood = open(project.name + '.testCb2Good.out', 'w') fpBad = open(project.name + '.testCb2Bad.out', 'w') mCount = project.molecule.modelCount for res in project.molecule.A.allResidues(): triplet = NTlist() for i in [-1, 0, 1]: triplet.append(res.sibling(i)) if None in triplet: nTdebug('Skipping ' % res) else: ca_atms = triplet.zap('CA') cb_atms = triplet.zap('CB') nTdebug("%s %s %s %s" % (res, triplet, ca_atms, cb_atms)) if None in cb_atms: # skip Gly for now nTdebug('Skipping %s' % res) else: d1 = Dihedral(res, 'Cb4N', range=[0.0, 360.0]) d1.atoms = [cb_atms[0], ca_atms[0], ca_atms[1], cb_atms[1]] d1.calculateValues() res['Cb4N'] = d1 # append dihedral to residue d2 = Dihedral(res, 'Cb4C', range=[0.0, 360.0]) d2.atoms = [cb_atms[1], ca_atms[1], ca_atms[2], cb_atms[2]] d2.calculateValues() res['Cb4C'] = d2 # append dihedral to residue bb = getDeepByKeys(res, WHATIF_STR, BBCCHK_STR, VALUE_LIST_STR, 0) # check first one. if bb == None: nTdebug('Skipping without BB %s' % res) continue if d1.cv < 0.03 and d2.cv < 0.03: # Only include structured residues for i in range(mCount): # Consider each model individually # bb = res.Whatif.bbNormality.valueList[i] bb = getDeepByKeys(res, WHATIF_STR, BBCCHK_STR, VALUE_LIST_STR, i) if bb == None: nTdebug('Skipping without BB %s' % res) continue angles = NTlist() # store phi, psi, chi1, chi2 for angle in ['PHI', 'PSI', 'CHI1', 'CHI2']: if res.has_key(angle): angles.append(res[angle][i]) else: angles.append(0.0) #end for if bb < 20.0: # Arbitrary 20 bb occurences as cuttoff for now fprintf(fpBad, '%4d %7.2f %7.2f %7.2f %s %s %s\n', res.resNum, d1[i], d2[i], bb, angles.format("%7.2f "), res, res.dssp.consensus) else: fprintf(fpGood, '%4d %7.2f %7.2f %7.2f %s %s %s\n', res.resNum, d1[i], d2[i], bb, angles.format("%7.2f "), res, res.dssp.consensus) #end if #end if #end if #end for fpBad.close() fpGood.close() fn = "allRestype_d1d2." + graphicsFormat ps.hardcopy(fn, graphicsFormat)
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = False # default: False htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doQueeny = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. # NB leave this set to True or modify code below. removeCcpnDirectory = 1 # perhaps not so in the future. modelCount = None # default setting is None # ranges = None if fastestTest: modelCount = 2 # if this is more and there is only one model present it leads to an error message. htmlOnly = True doWhatif = False doProcheck = False doWattos = False doQueeny = False doTalos = False forceRedo = True forceRetrieveInput = True nTmessage(header) nTmessage(getStartMessage()) # Sync below code with nrgCing#createToposTokens expectedArgumentList = """ verbosity inputDir outputDir pdbConvention restraintsConvention archiveType projectType storeCING2db ranges filterTopViolations filterVasco singleCoreOperation """.split() expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTmessage( "consider updating code to include all sequential parameters: %s" % str(expectedArgumentList)) if len(extraArgList) > expectedNumberOfArguments: nTerror("Got arguments: " + str(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % (expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True # end if # end if entryCodeChar2and3 = entryId[1:3] cing.verbosity = int(extraArgList[IDX_VERBOSITY]) inputDir = extraArgList[IDX_INPUT] outputDir = os.path.join(extraArgList[IDX_OUTPUT], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[IDX_PDB] #@UnusedVariable restraintsConvention = extraArgList[IDX_RESTRAINTS] archiveType = extraArgList[ IDX_ARCHIVE] # Only used for deriving the input location not the output. projectType = extraArgList[IDX_PROJECT_TYPE] storeCING2db = stringMeansBooleanTrue( getDeepByKeysOrAttributes(extraArgList, IDX_STORE_DB)) ranges = getDeepByKeysOrAttributes(extraArgList, IDX_RANGES) filterTopViolations = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_TOP) if filterTopViolations: filterTopViolations = int(filterTopViolations) # change '0' to 0 filterVasco = getDeepByKeysOrAttributes(extraArgList, IDX_FILTER_VASCO) if filterVasco: filterVasco = int(filterVasco) else: filterVasco = 1 # Default should be True # end if singleCoreOperation = getDeepByKeysOrAttributes(extraArgList, IDX_SINGLE_CORE_OPERATION) if singleCoreOperation: singleCoreOperation = int(singleCoreOperation) else: singleCoreOperation = 0 # Default should be True # end if if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) isRemoteOutputDir = False if '@' in outputDir: isRemoteOutputDir = True # end if # vc = vCing('.') # argument is a fake master_ssh_url not needed here. archive_id = getArchiveIdFromDirectoryName(outputDir) nTdebug("Using program arguments:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) nTdebug("filterTopViolations: %s" % filterTopViolations) nTdebug("filterVasco: %s" % filterVasco) nTdebug("singleCoreOperation: %s" % singleCoreOperation) nTdebug("") nTdebug("Using derived settings:") nTdebug("modelCount: %s" % modelCount) nTdebug("isRemoteOutputDir: %s" % isRemoteOutputDir) nTdebug("archive_id: %s" % archive_id) # For NMR_REDO required as most efficient. if singleCoreOperation: setToSingleCoreOperation() # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if forceRedo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. if isRemoteOutputDir: os.chdir(cingDirTmp) else: os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. formatFileName = '%s.tgz' if projectType == PROJECT_TYPE_CING: formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) allowedInputProtocolList = 'http file ssh'.split() inputProtocal = string.split(inputDir, ':')[0] if inputProtocal in allowedInputProtocolList: stillToRetrieve = False if os.path.exists(fileNameTgz): if forceRetrieveInput: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if else: nTdebug( "Entry not retrieved which might be normal in some situations.") # end if. if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. # fullFileNameTgz = os.path.join(inputDir, fileNameTgz) # shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') pdbFilePath = entryId + ".pdb" gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # end if if ranges is not None: project.molecule.setRanges(ranges) # end if if archive_id: project.molecule.setArchiveId(archive_id) # end if project.molecule.superpose(ranges=ranges) if filterTopViolations and not project.filterHighRestraintViol(): nTerror("Failed to filterHighRestraintViol") ####> MAIN UTILITY HERE if 0: # DEFAULT 0 project.save() if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doQueeny=doQueeny, doTalos=doTalos, filterVasco=filterVasco): nTerror("Failed to validate project read") return True # end if filterVasco # Write a single PDB file containing all models # according to IUPAC conventions project.export2PDB() project.save() if storeCING2db and archive_id: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. # archive_id = ARCHIVE_DEV_NRG_ID # if isProduction: # archive_id = ARCHIVE_NRG_ID try: if doStoreCING2db(entryId, archive_id, project=project): nTerror( "Failed to store CING project's data to DB but continuing." ) except: nTtracebackError() nTerror( "Failed to store CING project's data due to above traceback error." ) if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' os.unlink(fileNameTgz) # temporary ccpn tgz if removeCcpnDirectory: rmdir(entryId) # ccpn dir may contain vasco info. if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) nTdebug("cmd: %s" % cmd) # do_cmd(cmd) status, result = commands.getstatusoutput(cmd) if status: nTerror("Failed to tar status: %s with result %s" % (status, result)) return True if isRemoteOutputDir: if putFileBySsh(tgzFileNameCing, outputDir, ntriesMax=2): nTerror( "Failed to send File By Scp status: %s with result %s" % (status, result)) nTerror("Maintaining results.") return True # end if nTmessage("Removing tgz result: %s" % tgzFileNameCing) os.remove(tgzFileNameCing) nTmessage("Removing cing dir itself: %s" % directoryNameCing) rmdir(directoryNameCing) else: # do NOT remove local copy pass
def main(entryId, *extraArgList): """inputDir may be a directory or a url. A url needs to start with http://. """ fastestTest = True # default: False # ranges=AUTO_STR # default is None retrieved from DBMS csv files. htmlOnly = False # default: False but enable it for faster runs without some actual data. doWhatif = True # disables whatif actual run doProcheck = True doWattos = True doTalos = True tgzCing = True # default: True # Create a tgz for the cing project. In case of a CING project input it will be overwritten. modelCount = None # default setting is None if fastestTest: modelCount = 3 htmlOnly = True doWhatif = False doProcheck = False doWattos = False doTalos = False force_redo = True force_retrieve_input = True nTmessage(header) nTmessage(getStartMessage()) expectedArgumentList = [ 'inputDir', 'outputDir', 'pdbConvention', 'restraintsConvention', 'archiveType', 'projectType', 'storeCING2db' ] expectedNumberOfArguments = len(expectedArgumentList) if len(extraArgList) != expectedNumberOfArguments: nTerror("Got arguments: " + repr(extraArgList)) nTerror("Failed to get expected number of arguments: %d got %d" % (expectedNumberOfArguments, len(extraArgList))) nTerror("Expected arguments: %s" % expectedArgumentList) return True entryCodeChar2and3 = entryId[1:3] inputDir = extraArgList[0] outputDir = os.path.join(extraArgList[1], DATA_STR, entryCodeChar2and3, entryId) pdbConvention = extraArgList[2] #@UnusedVariable restraintsConvention = extraArgList[3] archiveType = extraArgList[4] projectType = extraArgList[5] storeCING2db = False if len(extraArgList) >= expectedNumberOfArguments: storeCING2db = extraArgList[6] if archiveType == ARCHIVE_TYPE_FLAT: pass # default elif archiveType == ARCHIVE_TYPE_BY_ENTRY: inputDir = os.path.join(inputDir, entryId) elif archiveType == ARCHIVE_TYPE_BY_CH23: inputDir = os.path.join(inputDir, entryCodeChar2and3) elif archiveType == ARCHIVE_TYPE_BY_CH23_BY_ENTRY: inputDir = os.path.join(inputDir, entryCodeChar2and3, entryId) ranges = None # targetId = getTargetForFullEntryName(entryId) # if not targetId: # nTerror("Failed to getTargetForFullEntryName for entryId: %s" % entryId) # return True # ranges = getRangesForTarget(targetId) # if ranges == None: # nTerror("Failed to getRangesForTarget for targetId: %s" % targetId) # return True nTdebug("Using:") nTdebug("inputDir: %s" % inputDir) nTdebug("outputDir: %s" % outputDir) nTdebug("pdbConvention: %s" % pdbConvention) nTdebug("restraintsConvention: %s" % restraintsConvention) nTdebug("archiveType: %s" % archiveType) nTdebug("projectType: %s" % projectType) nTdebug("modelCount: %s" % modelCount) nTdebug("storeCING2db: %s" % storeCING2db) nTdebug("ranges: %s" % ranges) # presume the directory still needs to be created. cingEntryDir = entryId + ".cing" if os.path.isdir(cingEntryDir): if force_redo: nTmessage("Enforcing a redo") rmtree(cingEntryDir) else: mainIndexFile = os.path.join(cingEntryDir, "index.html") isDone = os.path.isfile(mainIndexFile) if isDone: nTmessage("SKIPPING ENTRY ALREADY DONE") return nTmessage("REDOING BECAUSE VALIDATION CONSIDERED NOT DONE.") rmtree(cingEntryDir) # end if. # end if. os.chdir(outputDir) project = Project(entryId) if project.removeFromDisk(): nTerror("Failed to remove existing project (if present)") return True # end if. # extension = '.tgz' formatFileName = '%s.tgz' # fileNameTgz = entryId + '.tgz' if projectType == PROJECT_TYPE_CING: # fileNameTgz = entryId + '.cing.tgz' formatFileName = '%s.cing.tgz' elif projectType == PROJECT_TYPE_PDB: formatFileName = 'pdb%s.ent.gz' fileNameTgz = formatFileName % entryId # nTdebug("fileNameTgz: %s" % fileNameTgz) # if true will do retrieveTgzFromUrl. if inputDir.startswith("http") or inputDir.startswith("file"): stillToRetrieve = False if os.path.exists(fileNameTgz): if force_retrieve_input: os.unlink(fileNameTgz) stillToRetrieve = True # end if else: stillToRetrieve = True # end if if stillToRetrieve: retrieveTgzFromUrl(entryId, inputDir, archiveType=archiveType, formatFileName=formatFileName) # end if if not os.path.exists(fileNameTgz): nTerror("Tgz should already have been present skipping entry") return # end if # end if. # retrieveTgzFromUrl(entryId, inputDir) if projectType == PROJECT_TYPE_CING: # Needs to be copied because the open method doesn't take a directory argument.. fullFileNameTgz = os.path.join(inputDir, fileNameTgz) shutil.copy(fullFileNameTgz, '.') project = Project.open(entryId, status='old') if not project: nTerror("Failed to init old project") return True elif projectType == PROJECT_TYPE_CCPN: project = Project.open(entryId, status='new') if not project.initCcpn(ccpnFolder=fileNameTgz, modelCount=modelCount): nTerror("Failed to init project from ccpn") return True elif projectType == PROJECT_TYPE_PDB: project = Project.open(entryId, status='new') # pdbFileFormats = [ entryId + ".pdb", "pdb" + entryId + ".ent.gz" ] # for pdbFileName in pdbFileFormats: # pdbFileName = "pdb" + entryId + ".ent.gz" # # pdbFilePath = os.path.join( inputDir, pdbFileName) # pdbFilePath = os.path.join(inputDir, pdbFileName) # if os.path.exists(pdbFilePath): # break # tmpPdbFile = None # if pdbFileName.endswith('.gz'): pdbFilePath = entryId + ".pdb" # tmpPdbFile = pdbFilePath # if os.path.exists(pdbFilePath): # os.unlink(pdbFilePath) gunzip(fileNameTgz, outputFileName=pdbFilePath, removeOriginal=True) project.initPDB(pdbFile=pdbFilePath, convention=IUPAC, nmodels=modelCount) # if tmpPdbFile: if True: nTdebug("Removing tmp: %s" % pdbFilePath) os.unlink(pdbFilePath) # if inputDirOrg == inputDirCASD_NMR: # if True: # Default is False for this is specific to CASD-NMR # nTmessage("Renaming molecule name to entry id: %s" % entryId) # project.molecule.name = entryId # insufficient since all data is already initialized to disk. # project.updateProject() # project.molecule.rename( entryId ) # project.save() # project.molecule.ranges = ranges # JFD: this doesn't seem to be set there exactly. project.molecule.superpose(ranges=ranges) if True: if project.validate(htmlOnly=htmlOnly, ranges=ranges, doProcheck=doProcheck, doWhatif=doWhatif, doWattos=doWattos, doTalos=doTalos): nTerror("Failed to validate project read") return True if storeCING2db: # Does require: #from cing.PluginCode.sqlAlchemy import csqlAlchemy # and should never crash run. try: if doStoreCING2db(entryId, ARCHIVE_CASP_ID, project=project): nTerror( "Failed to store CING project's data to DB but continuing." ) except: nTtracebackError() nTerror( "Failed to store CING project's data due to above traceback error." ) project.save() if projectType == PROJECT_TYPE_CCPN: # fileNameTgz = entryId + '.tgz' # os.unlink(fileNameTgz) # temporary ccpn tgz rmdir(entryId) # temporary ccpn dir if tgzCing: directoryNameCing = entryId + ".cing" tgzFileNameCing = directoryNameCing + ".tgz" if os.path.exists(tgzFileNameCing): nTwarning("Overwriting: " + tgzFileNameCing) cmd = "tar -czf %s %s" % (tgzFileNameCing, directoryNameCing) do_cmd(cmd)