Пример #1
0
    def fcImportAllData(self):

        #
        # Get CASD-NMR data
        #
        print '### import', self.identifier

        self.tmpdir = tempfile.mkdtemp(dir=casdConstants.topTmpDir)
        try:
            self.unpackDir = os.path.join(self.tmpdir, 'unpack')
            os.mkdir(self.unpackDir)

            self.unpackCasdTgz()

            self.fileModifier()

            self.importCasdData()

            # NB adds all data in relevant NmrProject to run
            #casdUtil.makeNmrCalcRun(self.ccpnProject, task='CASD-NMR')
            casdUtil.makeNmrCalcRun(self.ccpnProject, task='CING')

            self.saveProject()

        finally:
            shutil.rmtree(self.tmpdir, ignore_errors=True)

        # Close log file(s)
        self.closeLogFiles()
Пример #2
0
def restraintOverview(entryNames, extractDir=None):
    """ Make overview  of restraint data available, and check data type
    """

    result = {}

    if not extractDir:
        extractDir = tempfile.mkdtemp(dir=casdConstants.topTmpDir)

    for entryName in entryNames:
        result[entryName] = info = {}
        inputDir = os.path.join(allDataDir, entryName[1:3], entryName,
                                entryName + '.input', 'restraints')
        ll = os.listdir(inputDir)
        if len(ll) == 1:
            source = os.path.join(inputDir, ll[0])

            # restraints, check them
            targetDir = os.path.join(extractDir, entryName[1:3], entryName)
            if not os.path.exists(targetDir):
                casdUtil.extractCompressedFile(source, targetDir, entryName)
            checkDir = casdUtil.getLowestSubDir(targetDir,
                                                followDirs=('cns_format', ))

            convertType = checkRestraintTypes(checkDir, entryName)

        elif ll:
            print entryName, 'ERROR, multifiles', ll

        else:
            print entryName, 'NONE'
Пример #3
0
def makeCalcData(inputData, resultData):
    """make calcData data structure from merging inputData and resultData
  """
    origNames = {}
    calcData = {}

    # get data from input
    for dd in inputData:
        if not dd.get("Invalid"):
            dd2 = {'isOriginal': True}
            for tag in (
                    'Target',
                    'LACS CA/CB Offset',
                    'Oligomeric state',
                    'PDBcode',
                    'Defined Residues',
            ):
                val = dd.get(tag)
                if val:
                    val = str(val)
                dd2[tag] = val
            entryName = casdUtil.getEntryName(dd2, isOriginal=True)
            calcData[entryName] = dd2
            origNames[dd['Target']] = entryName

    # get data from entries
    for dd in resultData:
        if not dd.get("Invalid"):
            origName = origNames.get(dd['Target'])
            if origName:
                dd2 = {}

                dd2['EntryID'] = dd.get('EntryID')
                for tag in ('Target', 'Group', 'Program Type', "RDCdata",
                            "Peaklist", "Truncated", 'Submitted on'):
                    val = dd.get(tag)
                    if val:
                        val = str(val)
                    dd2[tag] = val

                origDd = calcData[origName]
                for tag in (
                        'LACS CA/CB Offset',
                        'Oligomeric state',
                        'PDBcode',
                        'Defined Residues',
                ):
                    dd2[tag] = origDd[tag]

                entryName = casdUtil.getEntryName(dd2)
                calcData[entryName] = dd2
    #
    return calcData
Пример #4
0
def getCasdNmrProjectInfo(casdNmrRefFile=None):
    """
  Code to get list of CASD-NMR projects info
  """

    result = []

    # This file is customisable!
    if not casdNmrRefFile:
        casdNmrRefFile = os.path.join(allDataDir, 'dataPage.html')

    # Get the web page...
    text = ''.join(
        getReferenceTextFileFromHttp(casdNmrDataUrl,
                                     casdNmrRefFile,
                                     refText="CASD-NMR data",
                                     isGzipped=False))

    table, links = casdUtil.parseHtmlTable(text)

    tags = table[0]
    for ii in range(1, len(table)):
        dd = {}
        result.append(dd)
        ll = table[ii]
        for jj, val in enumerate(table[ii]):
            dd[tags[jj]] = val

        dd['DataLink'] = links[ii][0]

    #
    return result
Пример #5
0
def makeOverview(resultData, fieldOrder):
    """ make list-of-lists of results data, sorted.
  """

    result = []
    for info in resultData:

        entryName = casdUtil.getEntryName(info)

        # get normal data
        row = []
        result.append(row)
        for tag in fieldOrder:
            if tag == 'EntryName':
                row.append(entryName)
            else:
                row.append(info.get(tag))

        # Get data-is-present code string
        ll = []
        pp = os.path.join(allDataDir, entryName[1:3], entryName, entryName)
        if not os.path.exists(pp + '.tgz'):
            ll.append('Ccpn_NO')

        inputDir = pp + '.input'
        if not os.listdir(os.path.join(inputDir, 'restraints')):
            ll.append('Restraints_NO')
        if not os.listdir(os.path.join(inputDir, 'structures')):
            ll.append('Structures_NO')
        if os.listdir(os.path.join(inputDir, 'superseded')):
            ll.append('UseOrigData_NO')
        row.append(' '.join(ll))

    #
    result.sort()
    return result
Пример #6
0
def makeCcpnProject(entryName):
    """ Execute conversion to CCPN project
  """

    logFileHandle = None

    try:

        #entryName = casdUtil.getEntryName(info)
        orgName = entryName.split('_')[0] + '_Org'

        print 'Starting', entryName

        logFileHandle = casdUtil.createLogFile(entryName, 'extractEntry')

        # get CCPN project from Org data
        #orgName = casdUtil.getEntryName(info, isOriginal=True)
        path = os.path.join(allDataDir, orgName[1:3], orgName)
        ppath = os.path.join(path, orgName)
        ff = ppath + '.tgz'
        if not (os.path.exists(ppath) or os.path.exists(ff)):
            raise Exception("NO original CCPN project in %s" % path)
        if not os.path.isdir(ppath):
            casdUtil.extractCompressedFile(ff, path, entryName)
        if not os.path.exists(ppath):
            raise Exception("NO extracted CCPN project in %s" % path)
        ccpnProject = genIo.loadProject(ppath, suppressGeneralDataDir=True)

        # neutralize any other pending CASD-NMR projects
        casdRun = casdUtil.prepareNmrCalcRun(ccpnProject, 'CING')
        for run in casdRun.nmrCalcStore.findAllRuns(status='pending'):
            if run is not casdRun:
                run.status = 'provisional'

        #
        dataDir = os.path.join(allDataDir, entryName[1:3], entryName)
        tmpdir = tempfile.mkdtemp(dir=casdConstants.topTmpDir)
        try:

            # Extract structure data
            tmpstruc = os.path.join(tmpdir, 'structures')
            src = casdUtil.getInputFile(entryName, 'structures')
            casdUtil.extractCompressedFile(src,
                                           tmpstruc,
                                           entryName,
                                           okExts=('pdb', ))
            tmpstruc = casdUtil.getLowestSubDir(tmpstruc,
                                                followDirs=('cns_format', ))
            structureFiles = os.listdir(tmpstruc)

            # Extract restraint data
            tmprestr = os.path.join(tmpdir, 'restraints')
            src = casdUtil.getInputFile(entryName,
                                        'restraints',
                                        ignoreErrors=True)
            if src:
                casdUtil.extractCompressedFile(src, tmprestr, entryName)
                tmprestr = casdUtil.getLowestSubDir(
                    tmprestr, followDirs=('cns_format', ))
                restraintFiles = os.listdir(tmprestr)
            else:
                restraintFiles = ()
                print 'WARNING, %s no restraints found at %s' % (entryName,
                                                                 src)

            # read in data

            # FormatConverter version
            fcw = FormatConverterWrapper(ccpnProject=ccpnProject)
            # dataIo version
            #fcw = None

            if structureFiles:
                # NBNB uses Rasmus in-development trunk structure reading.
                # WOrks well. Temporarily disabled
                # read in structures
                pdbFiles = [
                    x for x in structureFiles if any(
                        x.endswith(y) for y in casdConstants.pdbEndings)
                ]

                floatFiles = [
                    x for x in structureFiles if x.endswith('.float')
                ]

                if floatFiles:
                    # Use only pdb files with names that match float files
                    stems = set(x[:-6] for x in floatFiles)
                    pdbFiles = [x for x in pdbFiles if x[:-4] in stems]

                pdbPaths = [os.path.join(tmpstruc, x) for x in pdbFiles]

                if True:
                    #if fcw is None:
                    #Always use dataIo version
                    # dataIo version
                    ensemble = StructureIo.getStructureFromFiles(
                        ccpnProject.findFirstMolSystem(), pdbPaths)

                    if ensemble is None:
                        print '### Skipping %s, no structures loaded' % entryName

                    else:
                        print '### num files, ensemble', len(
                            pdbPaths), ensemble.ensembleId
                        casdRun.newStructureEnsembleData(
                            name=entryName, structureEnsemble=ensemble)

                else:
                    # FormatConverter version
                    #fileInfo = fcw.determineFileInfo(pdbPaths[0])
                    if len(pdbPaths) != 1:
                        print 'WARNING %s pdb files, only one read. TBD FIX' % len(
                            pdbPaths)
                    dataType = 'coordinates'
                    formatName = 'pseudoPdb'
                    pdbPath = pdbPaths[0]
                    print 'Reading structure file', dataType, formatName, pdbPath
                    fcw.readFile(dataType, formatName, pdbPath)

                    # NBNB TODO 1) How to set up trying true PDB before pseudoPdb?
                    #           2) How to read several files into an ensemble?
                    #           3) How to get hold of the new ensemble for putting in NmrCalc

            else:
                print '### Skipping %s, no structure file' % entryName

            # Make NmrCalc object for shift list
            # NBNB consider later: if we are reading in assigned peaks,
            # shifts may change. NBNB
            shiftLists = casdRun.nmrCalcStore.nmrProject.findAllMeasurementLists(
                className='ShiftList')
            if len(shiftLists) == 1:
                casdRun.newMeasurementListData(
                    name='Shiftlist', measurementList=shiftLists.pop())
            else:
                print 'WARNING. %s shift lists found, should be s' % len(
                    shiftLists)

            # Restraints reading
            if restraintFiles:
                if fcw is None:
                    # NBNB TBD dataIo restraint reading to go here
                    #for rfile in restraintFiles:
                    #  fileInfo = casdUtil.getFileInfo(tmprestr, rfile)
                    pass

                else:
                    # FormatConverter version
                    restraintLists = []
                    for rfile in restraintFiles:
                        rpath = os.path.join(tmprestr, rfile)
                        fileInfo = fcw.determineFileInfo(rpath)
                        dataType = fileInfo.get('dataType')
                        formatName = fileInfo.get('formatName')
                        if dataType is None or formatName is None:
                            print 'Skipping unidentified restraint file', dataType, formatName, rfile

                        elif dataType not in (
                                'distanceConstraints',
                                'dihedralConstraints',
                                'rdcConstraints',
                        ):
                            print 'Skipping wrong type of restraint file', dataType, formatName, rfile

                        else:
                            print 'Reading restraint file', dataType, formatName, rfile
                            fcw.readFile(dataType, formatName, rpath)
                            if fcw.conversionSuccess:
                                print("Successful restraint file read:\n%s" %
                                      fcw.conversionInfo)
                                restraintLists.append(fcw.ccpnObjectOrList)
                            else:
                                print("Failed restraint file read:\n%s" %
                                      fcw.conversionInfo)

                    if restraintLists:
                        print("Found restraint lists: %s" %
                              len(restraintLists))
                        casdRun.newConstraintStoreData(
                            constraintLists=restraintLists,
                            name='Restraintlists')

                # linkResonances
                print '### linking resonances'
                linkingInfo = fcw.linkAllResonancesToAtoms()

        finally:
            shutil.rmtree(tmpdir, ignore_errors=True)
            pass

        # rename and package project
        ccpnOutputDir = os.path.join(dataDir, entryName)
        genIo.saveProject(ccpnProject,
                          newPath=ccpnOutputDir,
                          newProjectName=entryName,
                          checkValid=True,
                          removeExisting=True)
        genIo.packageProject(ccpnProject, ccpnOutputDir)
        shutil.rmtree(ccpnOutputDir)
        ccpnOutputPath = ccpnOutputDir + '.tgz'
        print 'SUCCESS, %s saved to %s' % (entryName, ccpnOutputPath)

        return ccpnOutputPath

    except:
        print 'ERROR for %s' % (entryName)
        traceback.print_exc(file=sys.stdout)

    finally:
        if logFileHandle is not None:
            logFileHandle.close()
            sys.stdout = sys.__stdout__
Пример #7
0
    def setImportDir(self, textOutput):

        self.importDir = casdUtil.getLowestSubDir(self.unpackDir)
Пример #8
0
    def getInputFile(self, subdir, ignoreErrors=False):

        return casdUtil.getInputFile(self.identifier,
                                     subdir,
                                     ignoreErrors=ignoreErrors)