def fcImportAllData(self): # # Get CASD-NMR data # print '### import', self.identifier self.tmpdir = tempfile.mkdtemp(dir=casdConstants.topTmpDir) try: self.unpackDir = os.path.join(self.tmpdir, 'unpack') os.mkdir(self.unpackDir) self.unpackCasdTgz() self.fileModifier() self.importCasdData() # NB adds all data in relevant NmrProject to run #casdUtil.makeNmrCalcRun(self.ccpnProject, task='CASD-NMR') casdUtil.makeNmrCalcRun(self.ccpnProject, task='CING') self.saveProject() finally: shutil.rmtree(self.tmpdir, ignore_errors=True) # Close log file(s) self.closeLogFiles()
def restraintOverview(entryNames, extractDir=None): """ Make overview of restraint data available, and check data type """ result = {} if not extractDir: extractDir = tempfile.mkdtemp(dir=casdConstants.topTmpDir) for entryName in entryNames: result[entryName] = info = {} inputDir = os.path.join(allDataDir, entryName[1:3], entryName, entryName + '.input', 'restraints') ll = os.listdir(inputDir) if len(ll) == 1: source = os.path.join(inputDir, ll[0]) # restraints, check them targetDir = os.path.join(extractDir, entryName[1:3], entryName) if not os.path.exists(targetDir): casdUtil.extractCompressedFile(source, targetDir, entryName) checkDir = casdUtil.getLowestSubDir(targetDir, followDirs=('cns_format', )) convertType = checkRestraintTypes(checkDir, entryName) elif ll: print entryName, 'ERROR, multifiles', ll else: print entryName, 'NONE'
def makeCalcData(inputData, resultData): """make calcData data structure from merging inputData and resultData """ origNames = {} calcData = {} # get data from input for dd in inputData: if not dd.get("Invalid"): dd2 = {'isOriginal': True} for tag in ( 'Target', 'LACS CA/CB Offset', 'Oligomeric state', 'PDBcode', 'Defined Residues', ): val = dd.get(tag) if val: val = str(val) dd2[tag] = val entryName = casdUtil.getEntryName(dd2, isOriginal=True) calcData[entryName] = dd2 origNames[dd['Target']] = entryName # get data from entries for dd in resultData: if not dd.get("Invalid"): origName = origNames.get(dd['Target']) if origName: dd2 = {} dd2['EntryID'] = dd.get('EntryID') for tag in ('Target', 'Group', 'Program Type', "RDCdata", "Peaklist", "Truncated", 'Submitted on'): val = dd.get(tag) if val: val = str(val) dd2[tag] = val origDd = calcData[origName] for tag in ( 'LACS CA/CB Offset', 'Oligomeric state', 'PDBcode', 'Defined Residues', ): dd2[tag] = origDd[tag] entryName = casdUtil.getEntryName(dd2) calcData[entryName] = dd2 # return calcData
def getCasdNmrProjectInfo(casdNmrRefFile=None): """ Code to get list of CASD-NMR projects info """ result = [] # This file is customisable! if not casdNmrRefFile: casdNmrRefFile = os.path.join(allDataDir, 'dataPage.html') # Get the web page... text = ''.join( getReferenceTextFileFromHttp(casdNmrDataUrl, casdNmrRefFile, refText="CASD-NMR data", isGzipped=False)) table, links = casdUtil.parseHtmlTable(text) tags = table[0] for ii in range(1, len(table)): dd = {} result.append(dd) ll = table[ii] for jj, val in enumerate(table[ii]): dd[tags[jj]] = val dd['DataLink'] = links[ii][0] # return result
def makeOverview(resultData, fieldOrder): """ make list-of-lists of results data, sorted. """ result = [] for info in resultData: entryName = casdUtil.getEntryName(info) # get normal data row = [] result.append(row) for tag in fieldOrder: if tag == 'EntryName': row.append(entryName) else: row.append(info.get(tag)) # Get data-is-present code string ll = [] pp = os.path.join(allDataDir, entryName[1:3], entryName, entryName) if not os.path.exists(pp + '.tgz'): ll.append('Ccpn_NO') inputDir = pp + '.input' if not os.listdir(os.path.join(inputDir, 'restraints')): ll.append('Restraints_NO') if not os.listdir(os.path.join(inputDir, 'structures')): ll.append('Structures_NO') if os.listdir(os.path.join(inputDir, 'superseded')): ll.append('UseOrigData_NO') row.append(' '.join(ll)) # result.sort() return result
def makeCcpnProject(entryName): """ Execute conversion to CCPN project """ logFileHandle = None try: #entryName = casdUtil.getEntryName(info) orgName = entryName.split('_')[0] + '_Org' print 'Starting', entryName logFileHandle = casdUtil.createLogFile(entryName, 'extractEntry') # get CCPN project from Org data #orgName = casdUtil.getEntryName(info, isOriginal=True) path = os.path.join(allDataDir, orgName[1:3], orgName) ppath = os.path.join(path, orgName) ff = ppath + '.tgz' if not (os.path.exists(ppath) or os.path.exists(ff)): raise Exception("NO original CCPN project in %s" % path) if not os.path.isdir(ppath): casdUtil.extractCompressedFile(ff, path, entryName) if not os.path.exists(ppath): raise Exception("NO extracted CCPN project in %s" % path) ccpnProject = genIo.loadProject(ppath, suppressGeneralDataDir=True) # neutralize any other pending CASD-NMR projects casdRun = casdUtil.prepareNmrCalcRun(ccpnProject, 'CING') for run in casdRun.nmrCalcStore.findAllRuns(status='pending'): if run is not casdRun: run.status = 'provisional' # dataDir = os.path.join(allDataDir, entryName[1:3], entryName) tmpdir = tempfile.mkdtemp(dir=casdConstants.topTmpDir) try: # Extract structure data tmpstruc = os.path.join(tmpdir, 'structures') src = casdUtil.getInputFile(entryName, 'structures') casdUtil.extractCompressedFile(src, tmpstruc, entryName, okExts=('pdb', )) tmpstruc = casdUtil.getLowestSubDir(tmpstruc, followDirs=('cns_format', )) structureFiles = os.listdir(tmpstruc) # Extract restraint data tmprestr = os.path.join(tmpdir, 'restraints') src = casdUtil.getInputFile(entryName, 'restraints', ignoreErrors=True) if src: casdUtil.extractCompressedFile(src, tmprestr, entryName) tmprestr = casdUtil.getLowestSubDir( tmprestr, followDirs=('cns_format', )) restraintFiles = os.listdir(tmprestr) else: restraintFiles = () print 'WARNING, %s no restraints found at %s' % (entryName, src) # read in data # FormatConverter version fcw = FormatConverterWrapper(ccpnProject=ccpnProject) # dataIo version #fcw = None if structureFiles: # NBNB uses Rasmus in-development trunk structure reading. # WOrks well. Temporarily disabled # read in structures pdbFiles = [ x for x in structureFiles if any( x.endswith(y) for y in casdConstants.pdbEndings) ] floatFiles = [ x for x in structureFiles if x.endswith('.float') ] if floatFiles: # Use only pdb files with names that match float files stems = set(x[:-6] for x in floatFiles) pdbFiles = [x for x in pdbFiles if x[:-4] in stems] pdbPaths = [os.path.join(tmpstruc, x) for x in pdbFiles] if True: #if fcw is None: #Always use dataIo version # dataIo version ensemble = StructureIo.getStructureFromFiles( ccpnProject.findFirstMolSystem(), pdbPaths) if ensemble is None: print '### Skipping %s, no structures loaded' % entryName else: print '### num files, ensemble', len( pdbPaths), ensemble.ensembleId casdRun.newStructureEnsembleData( name=entryName, structureEnsemble=ensemble) else: # FormatConverter version #fileInfo = fcw.determineFileInfo(pdbPaths[0]) if len(pdbPaths) != 1: print 'WARNING %s pdb files, only one read. TBD FIX' % len( pdbPaths) dataType = 'coordinates' formatName = 'pseudoPdb' pdbPath = pdbPaths[0] print 'Reading structure file', dataType, formatName, pdbPath fcw.readFile(dataType, formatName, pdbPath) # NBNB TODO 1) How to set up trying true PDB before pseudoPdb? # 2) How to read several files into an ensemble? # 3) How to get hold of the new ensemble for putting in NmrCalc else: print '### Skipping %s, no structure file' % entryName # Make NmrCalc object for shift list # NBNB consider later: if we are reading in assigned peaks, # shifts may change. NBNB shiftLists = casdRun.nmrCalcStore.nmrProject.findAllMeasurementLists( className='ShiftList') if len(shiftLists) == 1: casdRun.newMeasurementListData( name='Shiftlist', measurementList=shiftLists.pop()) else: print 'WARNING. %s shift lists found, should be s' % len( shiftLists) # Restraints reading if restraintFiles: if fcw is None: # NBNB TBD dataIo restraint reading to go here #for rfile in restraintFiles: # fileInfo = casdUtil.getFileInfo(tmprestr, rfile) pass else: # FormatConverter version restraintLists = [] for rfile in restraintFiles: rpath = os.path.join(tmprestr, rfile) fileInfo = fcw.determineFileInfo(rpath) dataType = fileInfo.get('dataType') formatName = fileInfo.get('formatName') if dataType is None or formatName is None: print 'Skipping unidentified restraint file', dataType, formatName, rfile elif dataType not in ( 'distanceConstraints', 'dihedralConstraints', 'rdcConstraints', ): print 'Skipping wrong type of restraint file', dataType, formatName, rfile else: print 'Reading restraint file', dataType, formatName, rfile fcw.readFile(dataType, formatName, rpath) if fcw.conversionSuccess: print("Successful restraint file read:\n%s" % fcw.conversionInfo) restraintLists.append(fcw.ccpnObjectOrList) else: print("Failed restraint file read:\n%s" % fcw.conversionInfo) if restraintLists: print("Found restraint lists: %s" % len(restraintLists)) casdRun.newConstraintStoreData( constraintLists=restraintLists, name='Restraintlists') # linkResonances print '### linking resonances' linkingInfo = fcw.linkAllResonancesToAtoms() finally: shutil.rmtree(tmpdir, ignore_errors=True) pass # rename and package project ccpnOutputDir = os.path.join(dataDir, entryName) genIo.saveProject(ccpnProject, newPath=ccpnOutputDir, newProjectName=entryName, checkValid=True, removeExisting=True) genIo.packageProject(ccpnProject, ccpnOutputDir) shutil.rmtree(ccpnOutputDir) ccpnOutputPath = ccpnOutputDir + '.tgz' print 'SUCCESS, %s saved to %s' % (entryName, ccpnOutputPath) return ccpnOutputPath except: print 'ERROR for %s' % (entryName) traceback.print_exc(file=sys.stdout) finally: if logFileHandle is not None: logFileHandle.close() sys.stdout = sys.__stdout__
def setImportDir(self, textOutput): self.importDir = casdUtil.getLowestSubDir(self.unpackDir)
def getInputFile(self, subdir, ignoreErrors=False): return casdUtil.getInputFile(self.identifier, subdir, ignoreErrors=ignoreErrors)