Exemplo n.º 1
0
 def __init__(self, dictName, dictRegistry=NamedDicts):
     """Set up for the protocol by opening the named persistent dictionary.
     """
     self.dictName = dictName
     try:
         self.dbFile = dictRegistry[dictName]['dbFile']
         self.port = dictRegistry[dictName]['port']
         if self.dbFile:
             dbHome = os.path.split(self.dbFile)[0]
             if not os.path.exists(dbHome):
                 os.makedirs(dbHome, 0o777)
             self.dbHome = dbHome
             logFile = dictRegistry[dictName]['logFile']
             if not logFile.startswith('/'):
                 logFile = os.path.join(dbHome, logFile)
             self.logFile = logFile
     except:
         raise PersistentDictFactoryException(
             'Error, no dict of that name: %s' % dictName)
     validateDirectory(os.path.dirname(self.logFile))
     log.startLogging(open(self.logFile, 'w'))
     if dictName == 'Test':
         self.dict = _TestDict
     else:
         self.dict = dbshelve.open(self.dbFile)
         os.chmod(self.dbFile, 0o666)
Exemplo n.º 2
0
 def setStore(self, wuid, cachedStore=False):
     # create store
     if wuid is None:
         return None
     args = [self._storeName, self._storeFieldsList]
     dbHome = os.path.join(self._storeArgs[0], 'wuidStores', wuid)
     validateDirectory(dbHome, noExceptionRaise=True)
     args.append(dbHome)
     args.extend(self._storeArgs[1:])
     if cachedStore:
         self._cachedStore = self._storeClass(*args)
     else:
         self._store = self._storeClass(*args)
     return True
Exemplo n.º 3
0
    def __init__(self, name, fieldsList, dbHome, dbName):
        """Constructor."""

        # call super()
        super(BsddbStore, self).__init__(name, fieldsList)

        # set db attributes
        self._dbHome = dbHome
        self._dbName = dbName

        # make sure dbHome directory exists
        if not validateDirectory(self._dbHome):
            raise BsddbStoreError("Couldn't create dbHome directory: %s." %
                                  self._dbHome)

        # create flag
        if os.path.isfile(os.path.join(self._dbHome, self._dbName)):
            createFlag = 0
        else:
            createFlag = 1

        # get db
        self._dbHandle = dbtablesCDB.bsdTableDB(
            self._dbName, dbhome=self._dbHome,
            create=createFlag)  # , dbflags=db.DB_INIT_CDB)

        # create table with columns if it doesn't exist
        if self._name not in self._dbHandle.ListTables():
            try:
                self._dbHandle.CreateTable(self._name, self._fieldsList)
            except dbtablesCDB.TableAlreadyExists:
                pass
Exemplo n.º 4
0
def updateJson(jsonFile,
               obj,
               stringifyKeys=[],
               ubt=None,
               publicizeKeys=[],
               pickleKeys=[]):
    """Write obj in JSON format to file or update it."""

    # publicize
    if isinstance(obj, dict) and ubt is not None and \
            len(publicizeKeys) > 0:
        obj = copy.deepcopy(obj)
        for k in publicizeKeys:
            obj[k] = publicizeResultFiles(obj[k], ubt)

    # make sure result is stringified
    if isinstance(obj, dict) and len(stringifyKeys) > 0:
        obj = copy.deepcopy(obj)
        for k in stringifyKeys:
            if obj.get(k, None) is not None:
                obj[k] = str(obj[k])

    # pickle keys
    if isinstance(obj, dict) and len(pickleKeys) > 0:
        obj = copy.deepcopy(obj)
        for k in pickleKeys:
            if obj.get(k, None) is not None:
                obj[k] = pickleThis(obj[k])

    validateDirectory(os.path.dirname(jsonFile))
    with open(jsonFile, 'w') as f:
        try:
            json.dump(obj, f)
        except:
            print("Got exception dumping json:\n{}".format(
                pformat(obj, indent=2)))
            raise
Exemplo n.º 5
0
def getWorkUnit(wuConfig, configFile=None, configDict={}):
    """Return work unit id and WorkUnit object from wuConfig.  Localizes
    any stage files."""

    workDir = GridServiceConfig(configFile).getWorkUnitWorkDir()
    validateDirectory(workDir)
    procId = wuConfig.getId()
    wuType = wuConfig.getType()
    wuClass = WorkUnitTypeMapping.get(wuType, None)
    wuid = generateWorkUnitId()
    wuWorkDir = os.path.join(workDir, wuid)
    copyToDir(wuConfig.getStageFiles(), wuWorkDir, unpackBundles=True)
    hex = wuConfig.getHexDigest()
    if wuClass is None:
        raise RuntimeError("Unimplemented WorkUnit subclass: %s" % wuType)
    if wuClass == 'sciflo':
        workUnit = wuClass(wuConfig.getCall(),
                           wuConfig.getArgs(),
                           wuWorkDir,
                           wuid=wuid,
                           procId=procId,
                           hexDigest=hex,
                           scifloid=generateScifloId(),
                           configDict=configDict)
    else:
        workUnit = wuClass(wuConfig.getCall(),
                           wuConfig.getArgs(),
                           wuWorkDir,
                           wuid=wuid,
                           procId=procId,
                           hexDigest=hex,
                           configDict=configDict)
    workUnit.setInfoItem('typ', wuType)
    workUnit.setInfoItem('status', readyStatus)
    workUnit.setInfoItem('procCount', wuConfig.getProcCount())
    return workUnit
Exemplo n.º 6
0
def getRootWorkDirFromConfiguration(file=None):
    """Return the dir path for workUnit's work directory."""

    # get config parser
    configParser = ScifloConfigParser(file)

    # root directory for work dirs
    workUnitRootWorkDir = configParser.getMandatoryParameter(
        'workUnitRootWorkDir')

    # print "##########################################validating:", workUnitRootWorkDir

    # validate
    if not validateDirectory(workUnitRootWorkDir):
        raise RuntimeError("Couldn't access/create workUnitRootWorkDir %s." %
                           workUnitRootWorkDir)

    return workUnitRootWorkDir
Exemplo n.º 7
0
def getScheduleConfigFromConfiguration(file=None):
    """Return the ScheduleConfig object as defined by the sciflo configuration
    xml file.
    """

    # get config parser
    configParser = ScifloConfigParser(file)

    # get schedule store type
    scheduleStoreType = configParser.getMandatoryParameter('scheduleStoreType')

    # print "scheduleStoreType:",scheduleStoreType

    # get params based on store type
    if scheduleStoreType in StoreTypeMapping:

        # get home and, if bsddb, validate that it is a directory
        scheduleStoreHome = configParser.getMandatoryParameter(
            'scheduleStoreHome')
        if scheduleStoreType == 'bsddb' and not validateDirectory(
                scheduleStoreHome):
            raise RuntimeError("Couldn't access/create bsddb home %s." %
                               scheduleStoreHome)

        # get filename for bsddb and validate
        scheduleStoreDb = configParser.getMandatoryParameter('scheduleStoreDb')

        # get name
        scheduleStoreName = configParser.getMandatoryParameter(
            'scheduleStoreName')

        # store config for workunit schedule store
        scheduleStoreConfig = StoreConfig(scheduleStoreType, scheduleStoreName,
                                          scheduleStoreFieldsList,
                                          scheduleStoreHome, scheduleStoreDb)

        return scheduleStoreConfig
    else:
        raise RuntimeError("Unknown scheduleStoreType %s in configuration." %
                           scheduleStoreType)
Exemplo n.º 8
0
    def __init__(self,
                 call,
                 args,
                 workDir,
                 verbose=False,
                 wuid=None,
                 procId=None,
                 hexDigest=None,
                 configDict={}):
        """Save call, args, and working directory."""

        self._call = call
        self._args = args
        self._workDir = workDir
        self._verbose = verbose
        self._wuid = wuid
        self._procId = procId
        self._hexDigest = hexDigest
        self._configDict = configDict
        self._cancelFlag = False
        if not validateDirectory(self._workDir):  # make sure workDir exists
            raise WorkUnitError(
                "Couldn't create work unit work directory: %s." %
                self._workDir)
        self._jsonFile = os.path.join(self._workDir, 'workunit.json')
        self._logFile = os.path.join(self._workDir, 'wu_execution.log')
        self._pidFile = os.path.join(self._workDir, 'workunit.pid')
        self._info = workUnitInfo(None,
                                  call=self._call,
                                  args=self._args,
                                  workDir=self._workDir,
                                  wuid=self._wuid,
                                  procId=self._procId,
                                  hex=self._hexDigest,
                                  cancelFlag=self._cancelFlag,
                                  jsonFile=self._jsonFile,
                                  pidFile=self._pidFile,
                                  executionLog=self._logFile)
Exemplo n.º 9
0
    def _execute(self, result, workDir):
        """Perform the post execution function.  Pass in the entire result
        of the work unit and this handler will run the conversion function
        on the specified indexed result or on the entire result (if the
        resultIndex is None).  Return the result of the function.
        """

        # get the result to perform function on
        if self._resultIndex is None:
            inputResult = result
        else:
            inputResult = result[self._resultIndex]

        # special case conversions
        xpathMatch = re.search(r'^xpath:(.+)$', self._conversionFuncStr)
        if xpathMatch:
            resultDoc, resultNs = getXmlEtree(inputResult)
            return resultDoc.xpath(xpathMatch.group(1), namespaces=resultNs)

        # eval the conversion function
        convFunc = getFunction(self._conversionFuncStr,
                               addToSysPath=getUserConversionFunctionsDir())

        # if FunctionWrapper, get local files
        tempDir = None
        tmpInputRes = []
        if isinstance(convFunc, LocalizingFunctionWrapper):
            tempDir = os.path.join(workDir, 'fileConversions')
            validateDirectory(tempDir)
            if not isinstance(inputResult, (list, tuple)):
                singleArgFlag = True
                tmpInput = [inputResult]
            else:
                singleArgFlag = False
                tmpInput = inputResult
            for ip in tmpInput:
                if not isinstance(ip, str):
                    raise PostExecutionHandlerError(
                        "Cannot localize input %s.  Please check return value of work unit."
                        % str(ip))
                match = re.search(r'^\w*?://', ip)
                if match:
                    filebase = urllib.parse.urlparse(ip)[2].split('/')[-1]
                else:
                    filebase = os.path.basename(ip)
                tempFile = os.path.join(tempDir, filebase)
                (ip, headers) = urllib.request.urlretrieve(ip, tempFile)
                tmpInputRes.append(tempFile)
            if singleArgFlag:
                inputResult = tmpInputRes[0]
            else:
                inputResult = tmpInputRes

        # do conversion
        if isinstance(convFunc, FileConversionFunction):
            if singleArgFlag:
                retVal = convFunc(inputResult)
            else:
                retVal = [convFunc(ip) for ip in inputResult]
        else:
            retVal = convFunc(inputResult)

        return retVal
Exemplo n.º 10
0
    def __init__(self,
                 sflString,
                 args={},
                 workers=4,
                 workerTimeout=None,
                 logLevel=logging.DEBUG,
                 workDir=None,
                 cacheName="WorkUnitCache",
                 outputDir=None,
                 scifloid=None,
                 publicize=False,
                 configFile=None,
                 lookupCache=True,
                 configDict={},
                 writeGraph=True,
                 statusUpdateFunc=None,
                 emailNotify=None,
                 outputUrl=None):
        """Constructor."""

        import multiprocessing as mp

        self.sflString = sflString
        self.args = normalizeScifloArgs(args)
        if isinstance(self.args, dict):
            self.sciflo = Sciflo(self.sflString, globalInputDict=self.args)
        elif isinstance(self.args, (list, tuple)):
            self.sciflo = Sciflo(self.sflString, self.args)
        else:
            raise ScifloExecutorError("Unrecognized type for args: %s" %
                                      type(self.args))
        self.sciflo.resolve()
        self.scifloName = self.sciflo.getName()
        self.wuConfigs = self.sciflo.getWorkUnitConfigs()
        if scifloid is None:
            self.scifloid = generateScifloId()
        else:
            self.scifloid = scifloid
        self.manager = mp.Manager()
        self.procIds = []
        self.applyResultsDict = {}
        self.resultsDict = {}
        self.postExecResultsDict = {}
        self.doneDict = {}
        self.executionError = None
        if workers > 50:
            raise ScifloExecutorError("Cannot specify workers > 50.")
        self.workers = workers
        self.pool = ScifloPool(self.workers)
        self.configDict = configDict
        #self.lock = self.manager.RLock()
        self.lock = threading.RLock()
        self.event = self.manager.Event()
        self.waiterProcess = mp.Process(target=waiter,
                                        name="waiter",
                                        args=[self.event])
        self.logLevel = logLevel
        if DEBUG_PROCESSING:
            self.logger = mp.process.getLogger()
        else:
            self.logger = logging.getLogger(self.scifloName)
        self.logger.setLevel(self.logLevel)
        self.pid = os.getpid()
        self.writeGraph = writeGraph
        self.statusUpdateFunc = statusUpdateFunc
        self.emailNotify = emailNotify

        # config file and GridServiceConfig
        self.configFile = configFile
        self.gsc = GridServiceConfig(self.configFile)

        # set worker timeout from config file and override
        self.workerTimeout = workerTimeout
        if self.workerTimeout is None:
            self.workerTimeout = self.gsc.getWorkerTimeout()

        # sciflo work dir
        if workDir is None:
            workDir = self.gsc.getWorkUnitWorkDir()
        self.workDir = os.path.abspath(workDir)
        validateDirectory(self.workDir)

        # sciflo output dir
        self.outputDir = outputDir
        if self.outputDir is None:
            self.outputDir = os.path.join(self.workDir, self.scifloid)
        validateDirectory(self.outputDir)
        self.outputUrl = outputUrl

        # send logging messages to an execution log also (in addition to console)
        self.logFile = os.path.join(self.outputDir, 'sfl_execution.log')
        fh = logging.FileHandler(self.logFile)
        fh.setLevel(self.logLevel)
        fh.setFormatter(logging.Formatter(LOG_FMT))
        self.logger.addHandler(fh)

        # cache related attrs
        self.cacheName = cacheName
        self.lookupCache = lookupCache
        if self.cacheName is None:
            self.pdict = None
        else:
            try:
                self.pdict = PersistentDict(self.cacheName, pickleVals=True)
            except Exception as e:
                self.logger.debug("Got exception trying to get PersistentDict \
for sciflo '%s': %s.  No cache will be used." % (self.scifloName, e),
                                  extra={'id': self.scifloid})
                self.cacheName = None
                self.pdict = None
        self.hexDict = {}

        # annotated doc
        self.annDoc = AnnotatedDoc(self.sciflo, self.outputDir)

        # json file
        self.jsonFile = os.path.join(self.outputDir, 'sciflo.json')

        # svgfile
        self.svgFile = os.path.join(self.outputDir, 'scifloGraph.svg')
        if self.writeGraph:
            self.sciflo.writeGraph(self.svgFile)

        # configFile, publicize, grid service config, base url and url base
        # tracker
        self.publicize = publicize
        self.baseUrl = self.gsc.getBaseUrl()
        if self.baseUrl is None:
            if self.publicize:
                self.baseUrl = self.gsc.getGridBaseUrl()
            else:
                self.baseUrl = "file://%s%s" % (getfqdn(), self.workDir)
        self.ubt = UrlBaseTracker(self.workDir, self.baseUrl)
        if self.publicize:
            self.publicizeUbt = self.ubt
        else:
            self.publicizeUbt = None

        # sciflo procId->wuid map
        self.procIdWuidMap = {}

        # build deferred ids, dict, and results dict
        for w in self.wuConfigs:
            procId = w.getId()

            # check if all args are resolved
            resolved = self.resolveArgs(w)

            # if all args are resolved, get work unit
            if resolved:
                self.hexDict[procId] = w.getHexDigest()
                try:
                    wu = getWorkUnit(w,
                                     configFile=self.configFile,
                                     configDict=self.configDict)
                except Exception as e:
                    raise ScifloExecutorError("Encountered error calling \
getWorkUnit(): %s\n%s" % (str(e), getTb()))
                wuid = wu.getWuid()
                appRes = WuReady(wu)
                # update info in work unit json for monitoring
                updateJson(wu.getJsonFile(),
                           wu.getInfo(),
                           stringifyKeys=STRINGIFY_FIELDS,
                           ubt=self.publicizeUbt,
                           publicizeKeys=WORK_UNIT_PUBLICIZE_FIELDS,
                           pickleKeys=PICKLE_FIELDS)
                self.updateStatus(
                    'WorkUnit status for "%s": %s' % (procId, readyStatus),
                    wu.getInfo())
            else:
                wuid = None
                appRes = w
                self.updateStatus(
                    'WorkUnit status for "%s": %s' % (procId, waitingStatus),
                    wu.getInfo())
            self.procIdWuidMap[procId] = wuid
            self.procIds.append(procId)
            self.applyResultsDict[procId] = appRes
            self.resultsDict[procId] = NoResult()
            self.postExecResultsDict[procId] = w.getPostExecutionTypeList()

        self.output = self.sciflo.getFlowOutputConfigs()

        # sciflo info
        self.scifloInfo = scifloInfo(None,
                                     scifloid=self.scifloid,
                                     scifloName=self.scifloName,
                                     call=self.sflString,
                                     args=self.args,
                                     workDir=self.workDir,
                                     status=sentStatus,
                                     pid=self.pid,
                                     procIds=self.procIds,
                                     procIdWuidMap=self.procIdWuidMap,
                                     outputDir=self.outputDir,
                                     jsonFile=self.jsonFile,
                                     svgFile=self.svgFile,
                                     executionLog=self.logFile)

        # update json
        updateJson(self.jsonFile,
                   self.scifloInfo,
                   stringifyKeys=STRINGIFY_FIELDS,
                   ubt=self.publicizeUbt,
                   publicizeKeys=SCIFLO_PUBLICIZE_FIELDS,
                   pickleKeys=PICKLE_FIELDS)