Exemplo n.º 1
0
    def __init__(self, config):
        BaseWorkerThread.__init__(self)
        self.config = config.FilesCleaner
        self.logger.debug('Configuration loaded')

        try:
            self.logger.setLevel(self.config.log_level)
        except:
            import logging
            self.logger = logging.getLogger()
            self.logger.setLevel(self.config.log_level)
        self.logger.debug('Configuration loaded')
        config_server = CouchServer(dburl=self.config.config_couch_instance)
        self.config_db = config_server.connectDatabase(self.config.config_database)
        self.logger.debug('Connected to files DB')
        self.phedex = PhEDEx(responseType='xml')
        self.log_dir = '%s/logs/%s/%s/%s' % ( self.config.componentDir, \
 str(datetime.datetime.now().month), str(datetime.datetime.now().year), "Ops")
        try:
            os.makedirs(self.log_dir)
        except OSError, e:
            if e.errno == errno.EEXIST:
                pass
            else:
                self.logger.error('Unknown error in mkdir' % e.errno)
                raise
Exemplo n.º 2
0
 def __init__(self, config):
     # configuration values:
     #     'uri' attribute (URL of the REST server and resource name)
     #         in case of CouchDB, the resource name is the database name
     #         http://servername:port/databaseName
     self.config = config
     
     # the class currently relies only on 1 REST server possibility - the
     # CouchDB server. as explained above, .database will be replaced by
     # .connection if both a generic REST server as well as CouchDB are to
     # be talked to
     split = self.config.uri.rfind('/')
     dbName = self.config.uri[split + 1:] # get last item of URI - database name
     url = self.config.uri[:split]
     # as opposed to CouchSink, here it's assumed the resource (the database name)
     # does exist, fail here otherwise
     # this check / rest of the constructed may be revised for
     #     general REST server
     server = CouchServer(url)
     databases = server.listDatabases()
     if dbName not in databases:
         raise Exception("%s: REST URI: %s, %s does not exist." %
                         (self.__class__.__name__, self.config.uri, dbName))
     self._database = Database(dbName, url)
     logging.debug("%s initialized." % self.__class__.__name__)
Exemplo n.º 3
0
class CouchAppTestHarness:
    """
    Test Harness for installing a couch database instance with several couchapps
    in a unittest.setUp and wiping it out in a unittest.tearDown
    
    
    """
    def __init__(self, dbName, couchUrl = None):
        self.couchUrl = os.environ.get("COUCHURL", couchUrl)
        self.dbName = dbName
        if self.couchUrl == None:
            msg = "COUCHRURL env var not set..."
            raise RuntimeError, msg
        self.couchServer = CouchServer(self.couchUrl)
        self.couchappConfig = Config()


    def create(self):
        """create couch db instance"""
        if self.dbName in self.couchServer.listDatabases():
            self.drop()

        self.couchServer.createDatabase(self.dbName)

    def drop(self):
        """blow away the couch db instance"""
        self.couchServer.deleteDatabase(self.dbName)

    def pushCouchapps(self, *couchappdirs):
        """
        push a list of couchapps to the database
        """
        for couchappdir in  couchappdirs:
            couchapppush(self.couchappConfig, couchappdir, "%s/%s" % (self.couchUrl, urllib.quote_plus(self.dbName)))
Exemplo n.º 4
0
    def testB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, 'specDir', 'spec.pkl')
        workload     = self.createWorkload(workloadName = workloadPath)
        testJobGroup = self.createTestJobGroup(config = config,
                                               name = workload.name(),
                                               specLocation = workloadPath,
                                               error = True)

        cachePath = os.path.join(config.JobCreator.jobCacheDir,
                                 "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        couchdb      = CouchServer(config.JobStateMachine.couchurl)
        jobdb        = couchdb.connectDatabase("%s/jobs" % self.databaseName)
        fwjrdb       = couchdb.connectDatabase("%s/fwjrs" % self.databaseName)
        jobdb.loadView("JobDump", "jobsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']
        fwjrdb.loadView("FWJRDump", "fwjrsByWorkflowName",
                        options = {"startkey": [workload.name()],
                                   "endkey": [workload.name(), {}]})['rows']

        testTaskArchiver = TaskArchiverPoller(config = config)
        testTaskArchiver.algorithm()

        dbname       = getattr(config.JobStateMachine, "couchDBName")
        workdatabase = couchdb.connectDatabase("%s/workloadsummary" % dbname)

        workloadSummary = workdatabase.document(id = workload.name())

        self.assertEqual(workloadSummary['errors']['/TestWorkload/ReReco']['failureTime'], 500)
        self.assertTrue(workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1'].has_key('99999'))

        failedRunInfo = workloadSummary['errors']['/TestWorkload/ReReco']['cmsRun1']['99999']['runs']
        for key, value in failedRunInfo.items():
            failedRunInfo[key] = list(set(value))
        self.assertEquals(failedRunInfo, {'10' : [12312]},
                          "Wrong lumi information in the summary for failed jobs")

        # Check the failures by site histograms
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['data']['T1_IT_CNAF']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['data']['T1_IT_CNAF']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['average']['Failed Jobs'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['99999'], 10)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['average']['8020'], 10)
        self.assertEqual(workloadSummary['histograms']['workflowLevel']['failuresBySite']['stdDev']['Failed Jobs'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['99999'], 0)
        self.assertEqual(workloadSummary['histograms']['stepLevel']['/TestWorkload/ReReco']['cmsRun1']['errorsBySite']['stdDev']['8020'], 0)
        return
Exemplo n.º 5
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseDaemon.__init__(self, config, 'RetryManager')

        if self.config.isOracle:
            self.oracleDB = HTTPRequests(self.config.oracleDB,
                                         self.config.opsProxy,
                                         self.config.opsProxy)
        else:
            try:
                server = CouchServer(dburl=self.config.couch_instance,
                                     ckey=self.config.opsProxy,
                                     cert=self.config.opsProxy)
                self.db = server.connectDatabase(self.config.files_database)
            except Exception as e:
                self.logger.exception('A problem occured when connecting to couchDB: %s' % e)
                raise
            self.logger.debug('Connected to files DB')

            # Set up a factory for loading plugins
        self.factory = WMFactory(self.config.retryAlgoDir, namespace=self.config.retryAlgoDir)
        try:
            self.plugin = self.factory.loadObject(self.config.algoName, self.config,
                                                  getFromCache=False, listFlag=True)
        except Exception as ex:
            msg = "Error loading plugin %s on path %s\n" % (self.config.algoName,
                                                            self.config.retryAlgoDir)
            msg += str(ex)
            self.logger.error(msg)
            raise RetryManagerException(msg)
        self.cooloffTime = self.config.cooloffTime
Exemplo n.º 6
0
class ChangeState(WMObject, WMConnectionBase):
    """
    Propagate the state of a job through the JSM.
    """
    def __init__(self, config, couchDbName = None):
        WMObject.__init__(self, config)
        WMConnectionBase.__init__(self, "WMCore.WMBS")

        if couchDbName == None:
            self.dbname = getattr(self.config.JobStateMachine, "couchDBName")
        else:
            self.dbname = couchDbName

        try:
            self.couchdb = CouchServer(self.config.JobStateMachine.couchurl)
            self.jobsdatabase = self.couchdb.connectDatabase("%s/jobs" % self.dbname)
            self.fwjrdatabase = self.couchdb.connectDatabase("%s/fwjrs" % self.dbname)
        except Exception, ex:
            logging.error("Error connecting to couch: %s" % str(ex))
            self.jobsdatabase = None
            self.fwjrdatabase = None            

        self.getCouchDAO = self.daofactory("Jobs.GetCouchID")
        self.setCouchDAO = self.daofactory("Jobs.SetCouchID")
        self.incrementRetryDAO = self.daofactory("Jobs.IncrementRetry")
        self.workflowTaskDAO = self.daofactory("Jobs.GetWorkflowTask")

        self.maxUploadedInputFiles = getattr(self.config.JobStateMachine, 'maxFWJRInputFiles', 1000)
        return
Exemplo n.º 7
0
    def atestB_testErrors(self):
        """
        _testErrors_

        Test with a failed FWJR
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=True
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        dbname = getattr(config.JobStateMachine, "couchDBName")
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertEqual(workloadSummary["/TestWorkload/ReReco"]["failureTime"], 500)
        self.assertTrue(workloadSummary["/TestWorkload/ReReco"]["cmsRun1"].has_key("99999"))
        return
Exemplo n.º 8
0
class Algo:
    """
    Plugins parent class.
    """
    def __init__(self, config, logger, users, pool_size):
        """
        Initialise class members
        """
        self.config = config
        self.logger = logger
        self.asyncServer = CouchServer(self.config.couch_instance)
        self.db = self.asyncServer.connectDatabase(self.config.files_database)
        self.config_db = self.asyncServer.connectDatabase(self.config.config_database)
        self.users = users
        self.pool_size = pool_size

    def __call__(self):
        """
        __call__ should be over written by subclasses such that useful results are returned
        """
        return []

    def updateSource(self, inputDict):
        """
        UpdateSource should be over written by subclasses to make a specific update in the source
        """
        return []
Exemplo n.º 9
0
class ChangeState(WMObject, WMConnectionBase):
    """
    Propagate the state of a job through the JSM.
    """

    def __init__(self, config, couchDbName=None):
        WMObject.__init__(self, config)
        WMConnectionBase.__init__(self, "WMCore.WMBS")

        if couchDbName == None:
            self.dbname = getattr(self.config.JobStateMachine, "couchDBName")
        else:
            self.dbname = couchDbName

        try:
            self.couchdb = CouchServer(self.config.JobStateMachine.couchurl)
            self.jobsdatabase = self.couchdb.connectDatabase("%s/jobs" % self.dbname)
            self.fwjrdatabase = self.couchdb.connectDatabase("%s/fwjrs" % self.dbname)
        except Exception, ex:
            logging.error("Error connecting to couch: %s" % str(ex))
            self.jobsdatabase = None
            self.fwjrdatabase = None

        try:
            self.dashboardReporter = DashboardReporter(config)
        except Exception, ex:
            logging.error(
                "Error setting up the \
                          dashboard reporter: %s"
                % str(ex)
            )
Exemplo n.º 10
0
    def setUp(self):
        """
        _setUp_

        Setup couchdb and the test environment
        """
        # Set external test helpers
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser")
        EmulatorHelper.setEmulators(siteDB = True)

        # Define test environment
        self.couchUrl = os.environ["COUCHURL"]
        self.acdcDBName = 'resubmitblock_t'
        self.validLocations = ['srm-cms.gridpp.rl.ac.uk', 'cmssrm.fnal.gov', 'srm.unl.edu']
        self.validLocationsCMSNames = ['T2_US_Nebraska', 'T1_US_FNAL', 'T1_UK_RAL']
        self.siteWhitelist = ['T2_XX_SiteA']
        self.workflowName = 'dballest_ReReco_workflow'
        couchServer = CouchServer(dburl = self.couchUrl)
        self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create = False)
        user = makeUser('unknown', '*****@*****.**', self.couchUrl, self.acdcDBName)
        user.create()

        return
Exemplo n.º 11
0
    def setUp(self):
        """
        _setUp_

        Setup couchdb and the test environment
        """
        super(ResubmitBlockTest, self).setUp()

        self.group = 'unknown'
        self.user = '******'

        # Set external test helpers
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser")
        EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=True, requestMgr=False)

        # Define test environment
        self.couchUrl = os.environ["COUCHURL"]
        self.acdcDBName = 'resubmitblock_t'
        self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL', 'T1_UK_RAL']
        self.siteWhitelist = ['T2_XX_SiteA']
        self.workflowName = 'dballest_ReReco_workflow'
        couchServer = CouchServer(dburl=self.couchUrl)
        self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False)
        user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName)
        user.create()

        return
Exemplo n.º 12
0
 def resubmitCouchPublication(self, asourl, asodb, proxy, taskname):
     """
     Resubmit failed publications by resetting the publication
     status in the CouchDB documents.
     """
     server = CouchServer(dburl=asourl, ckey=proxy, cert=proxy)
     try:
         database = server.connectDatabase(asodb)
     except Exception as ex:
         msg = "Error while trying to connect to CouchDB: %s" % (str(ex))
         raise Exception(msg)
     try:
         failedPublications = database.loadView('DBSPublisher', 'PublicationFailedByWorkflow', {'reduce': False, 'startkey': [taskname], 'endkey': [taskname, {}]})['rows']
     except Exception as ex:
         msg = "Error while trying to load view 'DBSPublisher.PublicationFailedByWorkflow' from CouchDB: %s" % (str(ex))
         raise Exception(msg)
     msg = "There are %d failed publications to resubmit: %s" % (len(failedPublications), failedPublications)
     self.logger.info(msg)
     for doc in failedPublications:
         docid = doc['id']
         if doc['key'][0] != taskname: # this should never happen...
             msg = "Skipping document %s as it seems to correspond to another task: %s" % (docid, doc['key'][0])
             self.logger.warning(msg)
             continue
         data = {'last_update': time.time(),
                 'retry': str(datetime.datetime.now()),
                 'publication_state': 'not_published',
                }
         try:
             database.updateDocument(docid, 'DBSPublisher', 'updateFile', data)
             self.logger.info("updating document %s " % docid)
         except Exception as ex:
             msg = "Error updating document %s in CouchDB: %s" % (docid, str(ex))
             self.logger.error(msg)
     return
Exemplo n.º 13
0
    def __init__(self, config):
        """
        Initialise class members
        """
        # Need a better way to test this without turning off this next line
        BaseWorkerThread.__init__(self)
        # logging.basicConfig(format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s',datefmt = '%m-%d %H:%M')
        # self.logger = logging.getLogger()
        # self.logger is set up by the BaseWorkerThread, we just set it's level

        self.config = config.AsyncTransfer
        try:
            self.logger.setLevel(self.config.log_level)
        except:
            import logging

            self.logger = logging.getLogger()
            self.logger.setLevel(self.config.log_level)

        self.logger.debug("Configuration loaded")
        server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.db = server.connectDatabase(self.config.files_database)
        config_server = CouchServer(dburl=self.config.config_couch_instance)
        self.config_db = config_server.connectDatabase(self.config.config_database)
        self.logger.debug("Connected to CouchDB")
        self.pool = Pool(processes=self.config.pool_size)
        try:
            self.phedex = PhEDEx(responseType="xml")
        except Exception, e:
            self.logger.exception("PhEDEx exception: %s" % e)
Exemplo n.º 14
0
    def saveCouch(self, couchUrl, couchDBName, metadata=None):
        """ Save this spec in CouchDB.  Returns URL """
        from WMCore.Database.CMSCouch import CouchServer, CouchInternalServerError
        metadata = metadata or {}
        server = CouchServer(couchUrl)
        database = server.connectDatabase(couchDBName)
        name = self.name()
        uri = '/%s/%s' % (couchDBName, name)
        specuri = uri + '/spec'
        if not database.documentExists(name):
            self.setSpecUrl(couchUrl + specuri)
            doc = database.put(uri, data=metadata, contentType='application/json')
            # doc = database.commitOne(self.name(), metadata)
            rev = doc['rev']
        else:
            # doc = database.get(uri+'?revs=true')
            doc = database.document(name)
            rev = doc['_rev']

        # specuriwrev = specuri + '?rev=%s' % rev
        workloadString = pickle.dumps(self.data)
        # result = database.put(specuriwrev, workloadString, contentType='application/text')
        retval = database.addAttachment(name, rev, workloadString, 'spec')
        if retval.get('ok', False) is not True:
            msg = "Failed to save a spec attachment in CouchDB for %s" % name
            raise CouchInternalServerError(msg, data=None, result=retval)

        url = couchUrl + specuri
        return url
Exemplo n.º 15
0
class DQMCouchAPI(WMObject, WMConnectionBase):
    """
    Update the harvesting status of a dataset in CouchDB
    """
    def __init__(self, config, couchDbName = None, couchurl = None):
        WMObject.__init__(self, config)
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        self.designDoc = "HarvestingDatasets"

        if couchDbName == None:
            self.dbname = getattr(self.config.HarvestingScheduler, "couchDBName",
                                  "dqm_default")
        else:
            self.dbname = couchDbName

        if couchurl is not None:
            self.couchurl = couchurl
        elif getattr(self.config.HarvestingScheduler, "couchurl",
                                                            None) is not None:
            self.couchurl = self.config.HarvestingScheduler.couchurl
        else:
            self.couchurl = self.config.JobStateMachine.couchurl

        try:
            self.couchdb = CouchServer(self.couchurl)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname,
                                                         size=_LIMIT)
        except Exception, ex:
            logging.error("Error connecting to couch: %s" % str(ex))
            self.database = None

        return 
Exemplo n.º 16
0
 def __init__(self, couchURL, summaryLevel):
     # set the connection for local couchDB call
     self.couchURL = couchURL
     self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
     self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
     self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False)
     self.summaryLevel = summaryLevel
Exemplo n.º 17
0
 def __init__(self, config, logger):
     self.config = config.CRABAsyncTransfer
     server = CouchServer(self.config.couch_instance)
     self.db = server.connectDatabase(self.config.files_database)
     self.logger = logger
     self.size = 0
     self.result = []
Exemplo n.º 18
0
    def setUp(self):
        """
        _setUp_

        Initialize the database and couch.
        """
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setupCouch("promptreco_t", "ConfigCache")
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)
        couchServer = CouchServer(os.environ["COUCHURL"])
        self.configDatabase = couchServer.connectDatabase("promptreco_t")
        self.testDir = self.testInit.generateWorkDir()

        myThread = threading.currentThread()
        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)
        self.listTasksByWorkflow = self.daoFactory(classname="Workflow.LoadFromName")
        self.listFilesets = self.daoFactory(classname="Fileset.List")
        self.listSubsMapping = self.daoFactory(classname="Subscriptions.ListSubsAndFilesetsFromWorkflow")

        return
Exemplo n.º 19
0
    def setUp(self):
        """
        _setUp_

        Setup couchdb and the test environment
        """
        super(ResubmitBlockTest, self).setUp()

        self.group = 'unknown'
        self.user = '******'

        # Set external test helpers
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser")

        # Define test environment
        self.couchUrl = os.environ["COUCHURL"]
        self.acdcDBName = 'resubmitblock_t'
        self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL_Disk', 'T1_UK_RAL_Disk']
        self.siteWhitelist = ['T2_XX_SiteA']
        siteDB = SiteDB()
        #Convert phedex node name to a valid processing site name
        self.PSNs = siteDB.PNNstoPSNs(self.validLocations)
        self.workflowName = 'dballest_ReReco_workflow'
        couchServer = CouchServer(dburl=self.couchUrl)
        self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False)
        user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName)
        user.create()

        return
Exemplo n.º 20
0
    def __init__(self, config):
        """
        Initialise class members
        """
        #Need a better way to test this without turning off this next line
        BaseDaemon.__init__(self, config, 'AsyncTransfer')

        self.dropbox_dir = '%s/dropbox/outputs' % self.config.componentDir
        if not os.path.isdir(self.dropbox_dir):
            try:
                os.makedirs(self.dropbox_dir)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    pass
                else:
                    self.logger.error('Unknown error in mkdir' % e.errno)
                    raise
        server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.db = server.connectDatabase(self.config.files_database)
        config_server = CouchServer(dburl=self.config.config_couch_instance)
        self.config_db = config_server.connectDatabase(self.config.config_database)
        self.logger.debug('Connected to CouchDB')
        self.pool = Pool(processes=self.config.pool_size)
        try:
            self.phedex = PhEDEx(responseType='xml', dict = {'key': self.config.opsProxy, 'cert': self.config.opsProxy})
        except Exception as e:
            self.logger.exception('PhEDEx exception: %s' % e)
        # Set up a factory for loading plugins
        self.factory = WMFactory(self.config.schedAlgoDir, namespace = self.config.schedAlgoDir)

        result_list = []
        current_running = []
Exemplo n.º 21
0
class WMStatsReader():

    def __init__(self, couchURL, dbName = None):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)

    def workflowsByStatus(self, statusList):
        keys = statusList
        options = {"stale": "update_after"}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys)
        workflowList = []
        for item in result["rows"]:
            workflowList.append(item["id"])
        return workflowList
    
    def replicate(self, target):
        self.couchServer.replicate(self.dbName, target, 
                                   continuous = True)
Exemplo n.º 22
0
    def __init__(self, config):

        BaseWorkerThread.__init__(self)

        self.config = config.AsyncTransfer

        # self.logger is set up by the BaseWorkerThread, we just set it's level
        try:
            self.logger.setLevel(self.config.log_level)
        except:
            import logging
            self.logger = logging.getLogger()
            self.logger.setLevel(self.config.log_level)

        self.logger.debug('Configuration loaded')

        # Set up a factory for loading plugins
        self.factory = WMFactory(self.config.pluginDir, namespace = self.config.pluginDir)

        # Asynch db
        server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.db = server.connectDatabase(self.config.files_database)
        self.logger.debug('Connected to CouchDB')

        return
Exemplo n.º 23
0
class WMStatsReader():

    def __init__(self, couchURL, dbName = None):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)

    def workflowsByStatus(self, statusList, format = "list", stale = "update_after"):
        keys = statusList
        options = {}
        if stale:
            options = {"stale": stale}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys)

        if format == "dict":
            workflowDict = {}
            for item in result["rows"]:
                workflowDict[item["id"]] = None
            return workflowDict
        else:
            workflowList = []
            for item in result["rows"]:
                workflowList.append(item["id"])
            return workflowList

    def workflowStatus(self, stale = "update_after"):
        """
        _workflowStatus_

        Return a dictionary with all available workflows,
        grouped by status and with the timestamp of the status
        """
        options = {}
        if stale:
            options = {"stale" : stale}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options)

        stateDict = {}
        for item in result['rows']:
            if item["key"] not in stateDict:
                stateDict[item["key"]] = {}
            stateDict[item["key"]][item["id"]] = item["value"]

        return stateDict

    def getDBInstance(self):
        return self.couchDB
    
    def getHeartbeat(self):
        try:
            return self.couchDB.info();
        except Exception, ex:
            return {'error_message': str(ex)}
Exemplo n.º 24
0
 def __init__(self, config):
     self.config = config
     # test if the configured database does not exist, create it
     server = CouchServer(self.config.url)
     databases = server.listDatabases()
     if self.config.database not in databases:
         server.createDatabase(self.config.database)
     self.database = Database(self.config.database, self.config.url)
     logging.debug("%s initialized." % self.__class__.__name__)
Exemplo n.º 25
0
 def __init__(self, couchURL, statSummaryDB, summaryLevel):
     # set the connection for local couchDB call
     self.couchURL = couchURL
     self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
     self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
     fwjrDBname = "%s/fwjrs" % self.dbName
     self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
     self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
     self.summaryLevel = summaryLevel
Exemplo n.º 26
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also add some dummy locations.
        """
        super(WorkQueueTestCase, self).setUp()
        self.queueDB = 'workqueue_t'
        self.queueInboxDB = 'workqueue_t_inbox'
        self.globalQDB = 'workqueue_t_global'
        self.globalQInboxDB = 'workqueue_t_global_inbox'
        self.localQDB = 'workqueue_t_local'
        self.localQInboxDB = 'workqueue_t_local_inbox'
        self.localQDB2 = 'workqueue_t_local2'
        self.localQInboxDB2 = 'workqueue_t_local2_inbox'
        self.configCacheDB = 'workqueue_t_config_cache'
        self.logDBName = 'logdb_t'
        self.requestDBName = 'workqueue_t_reqmgr_workload_cache'
        
        self.setSchema()
        self.testInit = TestInit('WorkQueueTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.addCleanup(self.testInit.clearDatabase)
        self.addCleanup(logging.debug, 'Cleanup called clearDatabase()')
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        self.testInit.setupCouch(self.queueDB, *self.couchApps)
        self.testInit.setupCouch(self.queueInboxDB, *self.couchApps)
        self.testInit.setupCouch(self.globalQDB, *self.couchApps)
        self.testInit.setupCouch(self.globalQInboxDB , *self.couchApps)
        self.testInit.setupCouch(self.localQDB, *self.couchApps)
        self.testInit.setupCouch(self.localQInboxDB, *self.couchApps)
        self.testInit.setupCouch(self.localQDB2, *self.couchApps)
        self.testInit.setupCouch(self.localQInboxDB2, *self.couchApps)
        self.testInit.setupCouch(self.configCacheDB, 'ConfigCache')
        self.testInit.setupCouch(self.logDBName, 'LogDB')
        self.testInit.setupCouch(self.requestDBName, 'ReqMgr')
        
        self.couchURL = os.environ.get("COUCHURL")
        couchServer = CouchServer(self.couchURL)
        self.configCacheDBInstance = couchServer.connectDatabase(self.configCacheDB)

        self.localCouchMonitor = CouchMonitor(self.couchURL)
        self.localCouchMonitor.deleteReplicatorDocs()
        self.addCleanup(self.localCouchMonitor.deleteReplicatorDocs)
        self.addCleanup(logging.debug, 'Cleanup called deleteReplicatorDocs()')
        self.addCleanup(self.testInit.tearDownCouch)
        self.addCleanup(logging.debug, 'Cleanup called tearDownCouch()')

        self.workDir = self.testInit.generateWorkDir()
        self.addCleanup(self.testInit.delWorkDir)
        self.addCleanup(logging.debug, 'Cleanup called delWorkDir()')

        return
Exemplo n.º 27
0
 def __init__(self, couchURL, dbName = None):
     couchURL = sanitizeURL(couchURL)['url']
     # set the connection for local couchDB call
     if dbName:
         self.couchURL = couchURL
         self.dbName = dbName
     else:
         self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
     self.couchServer = CouchServer(self.couchURL)
     self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)
Exemplo n.º 28
0
    def testE_multicore(self):
        """
        _multicore_

        Create a workload summary based on the multicore job report
        """

        myThread = threading.currentThread()

        config = self.getConfig()
        workloadPath = os.path.join(self.testDir, "specDir", "spec.pkl")
        workload = self.createWorkload(workloadName=workloadPath)
        testJobGroup = self.createTestJobGroup(
            config=config, name=workload.name(), specLocation=workloadPath, error=False, multicore=True
        )

        cachePath = os.path.join(config.JobCreator.jobCacheDir, "TestWorkload", "ReReco")
        os.makedirs(cachePath)
        self.assertTrue(os.path.exists(cachePath))

        dbname = config.TaskArchiver.workloadSummaryCouchDBName
        couchdb = CouchServer(config.JobStateMachine.couchurl)
        workdatabase = couchdb.connectDatabase(dbname)

        testTaskArchiver = TaskArchiverPoller(config=config)
        testTaskArchiver.algorithm()

        result = myThread.dbi.processData("SELECT * FROM wmbs_job")[0].fetchall()
        self.assertEqual(len(result), 0, "No job should have survived")
        result = myThread.dbi.processData("SELECT * FROM wmbs_subscription")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_jobgroup")[0].fetchall()
        self.assertEqual(len(result), 0)
        result = myThread.dbi.processData("SELECT * FROM wmbs_file_details")[0].fetchall()
        self.assertEqual(len(result), 0)

        workloadSummary = workdatabase.document(id="TestWorkload")

        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["minMergeTime"]["average"],
            5.7624950408900002,
            places=2,
        )
        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["numberOfMerges"]["average"],
            3.0,
            places=2,
        )
        self.assertAlmostEquals(
            workloadSummary["performance"]["/TestWorkload/ReReco"]["cmsRun1"]["averageProcessTime"]["average"],
            29.369966666700002,
            places=2,
        )
        return
Exemplo n.º 29
0
    def testJ_Resubmission(self):
        """
        _Resubmission_
 
        Test Resubmission
         
        """
        userName     = '******'
        groupName    = 'Li'
        teamName     = 'Tang'
        schema       = utils.getAndSetupSchema(self,
                                               userName = userName,
                                               groupName = groupName,
                                               teamName = teamName)
        schema['RequestType'] = "ReReco"
        configID = self.createConfig()
        schema["ConfigCacheID"] = configID
        schema["CouchDBName"] = self.couchDBName
        schema["CouchURL"]    = os.environ.get("COUCHURL")
        schema["CouchWorkloadDBName"] = self.couchDBName
 
        result = self.jsonSender.put('request', schema)
        requestName = result[0]['RequestName']
 
        # user, group schema already set up
        schema = utils.getSchema(groupName = groupName, userName = userName)
        schema['RequestType'] = "Resubmission"
        schema["CouchWorkloadDBName"] = self.couchDBName
        
        try:
            raises = False
            result = self.jsonSender.put('request', schema)
        except HTTPException as ex:
            raises = True
            self.assertEqual(ex.status, 400)
            self.assertTrue("Error in Workload Validation: Validation failed: InitialTaskPath is mendatory" in ex.result)
        self.assertTrue(raises)
 
        schema["InitialTaskPath"]     = '/%s/DataProcessing' % requestName
        schema["ACDCServer"]          = os.environ.get("COUCHURL")
        schema["ACDCDatabase"]        = self.couchDBName
        schema["CollectionName"]      = "SomeOtherName"
 
        # Here we just make sure that real result goes through
        result = self.jsonSender.put('request', schema)
        resubmitName = result[0]['RequestName']
        result = self.jsonSender.get('request/%s' % resubmitName)
 
        couchServer = CouchServer(self.testInit.couchUrl)
        reqmgrCouch = couchServer.connectDatabase(self.couchDBName)
        result = reqmgrCouch.loadView('ReqMgr', 'childresubmissionrequests', {}, [requestName])['rows']
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['key'], requestName)
        self.assertEqual(result[0]['id'], resubmitName)
Exemplo n.º 30
0
    def setUp(self):
        """
        _setUp_

        Initialize the database and couch.

        """
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setupCouch("taskchain_t", "ConfigCache")
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        couchServer = CouchServer(os.environ["COUCHURL"])
        self.configDatabase = couchServer.connectDatabase("taskchain_t")
        self.testInit.generateWorkDir()
        self.workload = None

        self.differentNCores = getTestFile(
            'data/ReqMgr/requests/Integration/TaskChain_Task_Multicore.json')
        return
Exemplo n.º 31
0
class CouchHandler(logging.handlers.HTTPHandler):
    def __init__(self, host, database):
        HTTPHandler.__init__(self, host, database, 'POST')
        from WMCore.Database.CMSCouch import CouchServer
        self.database = CouchServer(dburl=host).connectDatabase(database,
                                                                size=10)

    def emit(self, record):
        """
        Write a document to CouchDB representing the log message.
        """
        doc = {}
        doc['message'] = record.msg
        doc['threadName'] = record.threadName
        doc['name'] = record.name
        doc['created'] = record.created
        doc['process'] = record.process
        doc['levelno'] = record.levelno
        doc['lineno'] = record.lineno
        doc['processName'] = record.processName
        doc['levelname'] = record.levelname
        self.database.commitOne(doc, timestamp=True)
Exemplo n.º 32
0
 def __init__(self, config):
     """
     Initialise class members
     """
     BaseWorkerThread.__init__(self)
     self.config = config.RetryManager
     try:
         self.logger.setLevel(self.config.log_level)
     except:
         import logging
         self.logger = logging.getLogger()
         self.logger.setLevel(self.config.log_level)
     self.logger.debug('Configuration loaded')
     try:
         server = CouchServer(dburl=self.config.couch_instance,
                              ckey=self.config.opsProxy,
                              cert=self.config.opsProxy)
         self.db = server.connectDatabase(self.config.files_database)
     except Exception, e:
         self.logger.exception(
             'A problem occured when connecting to couchDB: %s' % e)
         raise
Exemplo n.º 33
0
    def _setUp(self):
        """
        Instantiate CouchServer reference.
        Test connection with CouchDB (first connect and retrieve attempt).

        """
        try:
            couchURL = getattr(self.config, "couchURL", None)
            if not couchURL:
                raise Exception(
                    "Configuration value 'couchURL' missing, can't connect to CouchDB."
                )
            self.couch = CouchServer(couchURL)
            # retrieves result which is not used during this set up
            r = self.couch.makeRequest(self._query)
        except Exception as ex:
            msg = ("%s: could not connect to CouchDB, reason: %s" %
                   (self.__class__.__name__, ex))
            raise Exception(msg)
        # observables shall be list-like integers
        if not isinstance(self.config.observables, (list, tuple)):
            self.config.observables = tuple([self.config.observables])
Exemplo n.º 34
0
    def setUp(self):
        self.couchURL = os.getenv("COUCHURL")
        self.server = CouchServer(self.couchURL)
        # Kill off any databases left over from previous runs
        # In python 3 the variables defined inside a comprehension are deteled
        # outside the comprehension. See: pylint W1662 comprehension-escape
        dbs = [db for db in self.server.listDatabases() if db.startswith('rotdb_unittest_')]
        for db in dbs:
            try:
                self.server.deleteDatabase(db)
            except:
                pass
        # Create a database, drop an existing one first
        testname = self.id().split('.')[-1].lower()
        self.dbname = 'rotdb_unittest_%s' % testname
        self.arcname = 'rotdb_unittest_%s_archive' % testname
        self.seedname = 'rotdb_unittest_%s_seedcfg' % testname
        # set a long value for times, tests do operations explicitly
        self.timing = {'archive':timedelta(seconds=1), 'expire':timedelta(seconds=2)}

        self.db = RotatingDatabase(dbname = self.dbname, url = self.couchURL,
                                   archivename = self.arcname, timing = self.timing)
Exemplo n.º 35
0
class CouchAppTestHarness:
    """
    Test Harness for installing a couch database instance with several couchapps
    in a unittest.setUp and wiping it out in a unittest.tearDown


    """
    def __init__(self, dbName, couchUrl=None):
        self.couchUrl = os.environ.get("COUCHURL", couchUrl)
        self.dbName = dbName
        if self.couchUrl == None:
            msg = "COUCHRURL env var not set..."
            raise RuntimeError(msg)
        if self.couchUrl.endswith('/'):
            raise RuntimeError("COUCHURL env var shouldn't end with /")
        self.couchServer = CouchServer(self.couchUrl)
        self.couchappConfig = Config()

    def create(self, dropExistingDb=True):
        """create couch db instance"""
        if self.dbName in self.couchServer.listDatabases():
            if not dropExistingDb:
                return
            self.drop()

        self.couchServer.createDatabase(self.dbName)

    def drop(self):
        """blow away the couch db instance"""
        self.couchServer.deleteDatabase(self.dbName)

    def pushCouchapps(self, *couchappdirs):
        """
        push a list of couchapps to the database
        """
        for couchappdir in couchappdirs:
            couchapppush(
                self.couchappConfig, couchappdir,
                "%s/%s" % (self.couchUrl, urllib.quote_plus(self.dbName)))
Exemplo n.º 36
0
    def setUp(self):
        myThread = threading.currentThread()

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)
        self.databaseName = "couchapp_t_0"

        # Setup config for couch connections
        config = self.testInit.getConfiguration()

        self.testInit.setupCouch(self.databaseName, "WorkloadSummary")
        self.testInit.setupCouch(
            "%s/jobs" % config.JobStateMachine.couchDBName, "JobDump")
        self.testInit.setupCouch(
            "%s/fwjrs" % config.JobStateMachine.couchDBName, "FWJRDump")
        self.testInit.setupCouch(config.JobStateMachine.summaryStatsDBName,
                                 "SummaryStats")

        # Create couch server and connect to databases
        self.couchdb = CouchServer(config.JobStateMachine.couchurl)
        self.jobsdatabase = self.couchdb.connectDatabase(
            "%s/jobs" % config.JobStateMachine.couchDBName)
        self.fwjrdatabase = self.couchdb.connectDatabase(
            "%s/fwjrs" % config.JobStateMachine.couchDBName)
        self.statsumdatabase = self.couchdb.connectDatabase(
            config.JobStateMachine.summaryStatsDBName)

        # Create changeState
        self.changeState = ChangeState(config)
        self.config = config

        # Create testDir
        self.testDir = self.testInit.generateWorkDir()

        return
Exemplo n.º 37
0
    def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, 
                 ckey = None, cert = None, capath = None, detail = True):
        self.dbname = couchDBName
        self.dburl  = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document  = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id']                = id
        self.document['pset_tweak_details'] = None
        self.document['info']               = None
        self.document['config']             = None
        return
Exemplo n.º 38
0
    def setUp(self):
        """
        _setUp_

        Setup the database and logging connection.  Try to create all of the
        WMBS tables.  Also add some dummy locations.
        """
        self.queueDB = 'workqueue_t'
        self.queueInboxDB = 'workqueue_t_inbox'
        self.globalQDB = 'workqueue_t_global'
        self.globalQInboxDB = 'workqueue_t_global_inbox'
        self.localQDB = 'workqueue_t_local'
        self.localQInboxDB = 'workqueue_t_local_inbox'
        self.localQDB2 = 'workqueue_t_local2'
        self.localQInboxDB2 = 'workqueue_t_local2_inbox'
        self.configCacheDB = 'workqueue_t_config_cache'

        self.setSchema()
        self.testInit = TestInit('WorkQueueTest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        self.testInit.setupCouch(self.queueDB, *self.couchApps)
        self.testInit.setupCouch(self.queueInboxDB, *self.couchApps)
        self.testInit.setupCouch(self.globalQDB, *self.couchApps)
        self.testInit.setupCouch(self.globalQInboxDB , *self.couchApps)
        self.testInit.setupCouch(self.localQDB, *self.couchApps)
        self.testInit.setupCouch(self.localQInboxDB, *self.couchApps)
        self.testInit.setupCouch(self.localQDB2, *self.couchApps)
        self.testInit.setupCouch(self.localQInboxDB2, *self.couchApps)
        self.testInit.setupCouch(self.configCacheDB, 'ConfigCache')

        couchServer = CouchServer(os.environ.get("COUCHURL"))
        self.configCacheDBInstance = couchServer.connectDatabase(self.configCacheDB)

        self.workDir = self.testInit.generateWorkDir()
        return
Exemplo n.º 39
0
    def __init__(self, db_url, db_name='workqueue',
                 inbox_name=None, parentQueue=None,
                 queueUrl=None, logger=None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging

        if inbox_name == None:
            inbox_name = "%s_inbox" % db_name

        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name, create=False, size=10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name, create=False, size=10000)
        self.queueUrl = sanitizeURL(queueUrl or (db_url + '/' + db_name))['url']
        self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'
Exemplo n.º 40
0
 def __init__(self, config):
     """
     Initialise class members
     """
     #Need a better way to test this without turning off this next line
     BaseWorkerThread.__init__(self)
     #logging.basicConfig(format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s',datefmt = '%m-%d %H:%M')
     #self.logger = logging.getLogger()
     # self.logger is set up by the BaseWorkerThread, we just set it's level
     self.config = config.DBSPublisher
     try:
         self.logger.setLevel(self.config.log_level)
     except:
         import logging
         self.logger = logging.getLogger()
         self.logger.setLevel(self.config.log_level)
     self.logger.debug('Configuration loaded')
     server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
     self.db = server.connectDatabase(self.config.files_database)
     self.logger.debug('Connected to CouchDB')
     # Set up a factory for loading plugins
     self.factory = WMFactory(self.config.schedAlgoDir, namespace = self.config.schedAlgoDir)
     self.pool = Pool(processes=self.config.publication_pool_size)
Exemplo n.º 41
0
    def __init__(self, config, couchDbName=None):
        WMObject.__init__(self, config)
        WMConnectionBase.__init__(self, "WMCore.WMBS")

        if couchDbName == None:
            self.dbname = getattr(self.config.JobStateMachine, "couchDBName")
        else:
            self.dbname = couchDbName

        self.jobsdatabase = None
        self.fwjrdatabase = None
        self.jsumdatabase = None
        self.statsumdatabase = None

        self.couchdb = CouchServer(self.config.JobStateMachine.couchurl)
        self._connectDatabases()

        try:
            self.dashboardReporter = DashboardReporter(config)
        except Exception as ex:
            logging.error("Error setting up the dashboard reporter: %s",
                          str(ex))
            raise

        self.getCouchDAO = self.daofactory("Jobs.GetCouchID")
        self.setCouchDAO = self.daofactory("Jobs.SetCouchID")
        self.incrementRetryDAO = self.daofactory("Jobs.IncrementRetry")
        self.workflowTaskDAO = self.daofactory("Jobs.GetWorkflowTask")
        self.jobTypeDAO = self.daofactory("Jobs.GetType")
        self.updateLocationDAO = self.daofactory("Jobs.UpdateLocation")
        self.getWorkflowSpecDAO = self.daofactory(
            "Workflow.GetSpecAndNameFromTask")

        self.maxUploadedInputFiles = getattr(self.config.JobStateMachine,
                                             'maxFWJRInputFiles', 1000)
        self.workloadCache = {}
        return
Exemplo n.º 42
0
 def __init__(self, couchURL, statSummaryDB, summaryLevel):
     # set the connection for local couchDB call
     self.couchURL = couchURL
     self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
     self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
     self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False)
     #TODO: remove the hard coded name (wma_summarydb)
     self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
     self.summaryLevel = summaryLevel
Exemplo n.º 43
0
 def setup(self, parameters):
     """
     Called at startup
     """
     self.teamName = self.config.Agent.teamName
     # set the connection for local couchDB call
     self.useReqMgrForCompletionCheck = getattr(self.config.TaskArchiver, 'useReqMgrForCompletionCheck', True)
     self.archiveDelayHours = getattr(self.config.TaskArchiver, 'archiveDelayHours', 0)
     self.wmstatsCouchDB = WMStatsWriter(self.config.TaskArchiver.localWMStatsURL, 
                                         "WMStatsAgent")
     
     #TODO: we might need to use local db for Tier0
     self.centralRequestDBReader = RequestDBReader(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                   couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
     
     if self.useReqMgrForCompletionCheck:
         self.deletableState = "announced"
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.centralRequestDBURL, 
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
         self.reqmgr2Svc = ReqMgr(self.config.TaskArchiver.ReqMgr2ServiceURL)
         #TODO: remove this when reqmgr2 replace reqmgr completely (reqmgr2Only)
         self.reqmgrSvc = RequestManager({'endpoint': self.config.TaskArchiver.ReqMgrServiceURL})
     else:
         # Tier0 case
         self.deletableState = "completed"
         # use local for update
         self.centralRequestDBWriter = RequestDBWriter(self.config.AnalyticsDataCollector.localT0RequestDBURL, 
                                                       couchapp=self.config.AnalyticsDataCollector.RequestCouchApp)
     
     jobDBurl = sanitizeURL(self.config.JobStateMachine.couchurl)['url']
     jobDBName = self.config.JobStateMachine.couchDBName
     self.jobCouchdb  = CouchServer(jobDBurl)
     self.jobsdatabase = self.jobCouchdb.connectDatabase("%s/jobs" % jobDBName)
     self.fwjrdatabase = self.jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
     
     statSummaryDBName = self.config.JobStateMachine.summaryStatsDBName
     self.statsumdatabase = self.jobCouchdb.connectDatabase(statSummaryDBName)
Exemplo n.º 44
0
    def __init__(self, config):
        BaseWorkerThread.__init__(self)
        self.config = config.Analytics

        try:
            self.logger.setLevel(self.config.log_level)
        except:
            import logging
            self.logger = logging.getLogger()
            self.logger.setLevel(self.config.log_level)
        self.logger.debug('Configuration loaded')
        server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.db = server.connectDatabase(self.config.files_database)
        self.logger.debug('Connected to local couchDB')
        config_server = CouchServer(dburl=self.config.config_couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.config_db = config_server.connectDatabase(self.config.config_database)
        self.amq_auth_file = self.config.amq_auth_file
        monitoring_server = CouchServer(dburl=self.config.couch_user_monitoring_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
        self.monitoring_db = monitoring_server.connectDatabase(self.config.user_monitoring_db)
        self.logger.debug('Connected to user_monitoring_db in couchDB')
Exemplo n.º 45
0
    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection(destroyAllDatabase=True)
        self.couchUrl = os.environ["COUCHURL"]
        self.couchDBName = "acdc_event_based_t"
        self.testInit.setupCouch(self.couchDBName, "GroupUser", "ACDC")
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)
        couchSever = CouchServer(dburl=self.couchUrl)
        self.couchDB = couchSever.connectDatabase(self.couchDBName)
        self.populateWMBS()
        self.performanceParams = {'timePerEvent': 12,
                                  'memoryRequirement': 2300,
                                  'sizePerEvent': 400}
        self.eventsPerJob = 100

        return
Exemplo n.º 46
0
class Registration():
    def __init__(self, cfg_dict={}, reg_info={}):
        """
        Initialise the regsvc for this component,
        """
        try:
            config_dict = {
                'server': 'https://cmsweb.cern.ch/',
                'database': 'registration',
                'cacheduration': 1,
            }

            config_dict.update(cfg_dict)

            self.server = CouchServer(config_dict['server'])
            self.db = self.server.connectDatabase(config_dict['database'])

            if 'location' not in reg_info.keys():
                raise KeyError('Registration needs a location in its reg_info')
            self.location_hash = str(reg_info['location'].__hash__())
            reg_info['_id'] = self.location_hash
            reg_info['#config_hash'] = hash(str(reg_info))
            push_cfg = True
            if self.db.documentExists(self.location_hash):
                # If the doc exists, check that the configuration hasn't changed
                doc = self.db.document(self.location_hash)
                push_cfg = doc['#config_hash'] != reg_info['#config_hash']
                reg_info['_rev'] = doc['_rev']
            if push_cfg:
                self.db.commitOne(reg_info)
        except:
            # Don't want to raise anything here
            # TODO: but should probably log...
            pass
        self.report()

    def report(self):
        """
        'Ping' the RegSvc with a doc containing the service doc's ID and a
        timestamp, this can be used to provide uptime information.
        """
        try:
            self.db.commitOne({'service': self.location_hash}, timestamp=True)
        except:
            # Don't want to raise anything here
            # TODO: but should probably log...
            pass
Exemplo n.º 47
0
def installCouchApp(couchUrl, couchDBName, couchAppName, basePath=None):
    """
    _installCouchApp_

    Install the given couch app on the given server in the given database.  If
    the database already exists it will be deleted.
    """
    if not basePath:
        basePath = couchAppRoot()
    print("Installing %s into %s" % (couchAppName, couchDBName))

    couchServer = CouchServer(couchUrl)
    couchappConfig = Config()

    couchapppush(couchappConfig, "%s/%s" % (basePath, couchAppName),
                 "%s/%s" % (couchUrl, couchDBName))
    return
Exemplo n.º 48
0
    def __init__(self, config, couchDbName=None):
        WMObject.__init__(self, config)
        WMConnectionBase.__init__(self, "WMCore.WMBS")

        if couchDbName == None:
            self.dbname = getattr(self.config.JobStateMachine, "couchDBName")
        else:
            self.dbname = couchDbName

        self.couchdb = CouchServer(self.config.JobStateMachine.couchurl)
        self._connectDatabases()

        try:
            self.dashboardReporter = DashboardReporter(config)
        except Exception, ex:
            logging.error("Error setting up the \
-                          dashboard reporter: %s" % str(ex))
            raise
Exemplo n.º 49
0
class WMLoggingTest(unittest.TestCase):
    def setUp(self):
        # Make an instance of the server
        self.server = CouchServer(
            os.getenv("COUCHURL", 'http://*****:*****@localhost:5984'))
        testname = self.id().split('.')[-1]
        # Create a database, drop an existing one first
        self.dbname = 'cmscouch_unittest_%s' % testname.lower()

        if self.dbname in self.server.listDatabases():
            self.server.deleteDatabase(self.dbname)

        self.server.createDatabase(self.dbname)
        self.db = self.server.connectDatabase(self.dbname)

    def tearDown(self):
        if self._exc_info()[0] == None:
            # This test has passed, clean up after it
            testname = self.id().split('.')[-1]
            self.server.deleteDatabase(self.dbname)

    def testLog(self):
        """
        Write ten log messages to the database at three different levels
        """
        my_logger = logging.getLogger('MyLogger')
        my_logger.setLevel(logging.DEBUG)
        handler = CouchHandler(self.server.url, self.dbname)
        formatter = logging.Formatter('%(message)s')
        handler.setFormatter(formatter)
        my_logger.addHandler(handler)

        for i in range(10):
            my_logger.debug('This is probably all noise.')
            my_logger.info('Jackdaws love my big sphinx of quartz.')
            my_logger.error('HOLLY CRAP!')
        logs = self.db.allDocs()['rows']
        self.assertEqual(30, len(logs))
Exemplo n.º 50
0
class Source:
    """
    Plugins parent class.
    """
    def __init__(self, config, logger):
        """
        Initialise class members
        """
        self.config = config
        self.logger = logger
        self.asyncServer = CouchServer(self.config.couch_instance)
        self.db = self.asyncServer.connectDatabase(self.config.files_database)
        try:
            query = {'limit': 1, 'descending': True}
            last_pollTime = self.db.loadView('AsyncTransfer', 'lastPollTime',
                                             query)['rows'][0]['key']
            self.since = last_pollTime + 1
        except:
            self.since = 0
        try:
            self.phedexApi = PhEDEx(secure=True, dict={})
        except Exception, e:
            self.logger.exception('PhEDEx object exception: %s' % e)
Exemplo n.º 51
0
    def testRemoveByCollectionName(self):
        """
        _testRemoveByCollectionName_

        Check the function to obliterate all the filesets of a collection
        """
        self.populateCouchDB()
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)
        database = CouchServer(self.testInit.couchUrl).connectDatabase(self.testInit.couchDbName)

        results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Thunderstruck")
        results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"])
        self.assertEqual(len(results["rows"]), 0)
        results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Struckthunder")
        results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"])
        self.assertEqual(len(results["rows"]), 0)
        return
Exemplo n.º 52
0
class ConfigCache(WMObject):
    """
    _ConfigCache_

    The class that handles the upload and download of configCache
    artifacts from Couch
    """
    def __init__(self,
                 dbURL,
                 couchDBName=None,
                 id=None,
                 rev=None,
                 usePYCurl=False,
                 ckey=None,
                 cert=None,
                 capath=None,
                 detail=True):
        self.dbname = couchDBName
        self.dburl = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl,
                                       usePYCurl=usePYCurl,
                                       ckey=ckey,
                                       cert=cert,
                                       capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id'] = id
        self.document['pset_tweak_details'] = None
        self.document['info'] = None
        self.document['config'] = None
        return

    def createDatabase(self):
        """
        _createDatabase_

        """
        database = self.couchdb.createDatabase(self.dbname)
        database.commit()
        return database

    def connectUserGroup(self, groupname, username):
        """
        _connectUserGroup_

        """
        self.group = Group(name=groupname)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.owner = makeUser(groupname,
                              username,
                              couchUrl=self.dburl,
                              couchDatabase=self.dbname)
        return

    def createUserGroup(self, groupname, username):
        """
        _createUserGroup_

        Create all the userGroup information
        """
        self.createGroup(name=groupname)
        self.createUser(username=username)
        return

    def createGroup(self, name):
        """
        _createGroup_

        Create Group for GroupUser
        """
        self.group = Group(name=name)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.group.create()
        return

    def setLabel(self, label):
        """
        _setLabel_

        Util to add a descriptive label to the configuration doc
        """
        self.document['description']['config_label'] = label

    def setDescription(self, desc):
        """
        _setDescription_

        Util to add a verbose description string to a configuration doc
        """
        self.document['description']['config_desc'] = desc

    @Decorators.requireGroup
    def createUser(self, username):
        self.owner = makeUser(self.group['name'],
                              username,
                              couchUrl=self.dburl,
                              couchDatabase=self.dbname)
        self.owner.create()
        self.owner.ownThis(self.document)
        return

    @Decorators.requireGroup
    @Decorators.requireUser
    def save(self):
        """
        _save_

        Save yourself!  Save your internal document.
        """
        rawResults = self.database.commit(doc=self.document)

        # We should only be committing one document at a time
        # if not, get the last one.

        try:
            commitResults = rawResults[-1]
            self.document["_rev"] = commitResults.get('rev')
            self.document["_id"] = commitResults.get('id')
        except KeyError as ex:
            msg = "Document returned from couch without ID or Revision\n"
            msg += "Document probably bad\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        # Now do the attachments
        for attachName in self.attachments:
            self.saveAttachment(name=attachName,
                                attachment=self.attachments[attachName])

        return

    def saveAttachment(self, name, attachment):
        """
        _saveAttachment_

        Save an attachment to the document
        """

        retval = self.database.addAttachment(self.document["_id"],
                                             self.document["_rev"], attachment,
                                             name)

        if retval.get('ok', False) != True:
            # Then we have a problem
            msg = "Adding an attachment to document failed\n"
            msg += str(retval)
            msg += "ID: %s, Rev: %s" % (self.document["_id"],
                                        self.document["_rev"])
            logging.error(msg)
            raise ConfigCacheException(msg)

        self.document["_rev"] = retval['rev']
        self.document["_id"] = retval['id']
        self.attachments[name] = attachment

        return

    def loadDocument(self, configID):
        """
        _loadDocument_

        Load a document from the document cache given its couchID
        """
        self.document = self.docs_cache[configID]

    def loadByID(self, configID):
        """
        _loadByID_

        Load a document from the server given its couchID
        """
        try:
            self.document = self.database.document(id=configID)
            if 'owner' in self.document.keys():
                self.connectUserGroup(
                    groupname=self.document['owner'].get('group', None),
                    username=self.document['owner'].get('user', None))
            if '_attachments' in self.document.keys():
                # Then we need to load the attachments
                for key in self.document['_attachments'].keys():
                    self.loadAttachment(name=key)
        except CouchNotFoundError as ex:
            msg = "Document with id %s not found in couch\n" % (configID)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)
        except Exception as ex:
            msg = "Error loading document from couch\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        return

    def loadAttachment(self, name, overwrite=True):
        """
        _loadAttachment_

        Load an attachment from the database and put it somewhere useful
        """

        attach = self.database.getAttachment(self.document["_id"], name)

        if not overwrite:
            if name in self.attachments.keys():
                logging.info("Attachment already exists, so we're skipping")
                return

        self.attachments[name] = attach

        return

    def loadByView(self, view, value):
        """
        _loadByView_

        Underlying code to load views
        """

        viewRes = self.database.loadView('ConfigCache', view, {}, [value])

        if len(viewRes['rows']) == 0:
            # Then we have a problem
            logging.error("Unable to load using view %s and value %s" %
                          (view, str(value)))

        self.unwrapView(viewRes)
        self.loadByID(self.document["_id"])
        return

    def saveConfigToDisk(self, targetFile):
        """
        _saveConfigToDisk_

        Make sure we can save our config file to disk
        """
        config = self.getConfig()
        if not config:
            return

        # Write to a file
        f = open(targetFile, 'w')
        f.write(config)
        f.close()
        return

    def load(self):
        """
        _load_

        Figure out how to load
        """

        if self.document.get("_id", None) != None:
            # Then we should load by ID
            self.loadByID(self.document["_id"])
            return

        # Otherwise we have to load by view

        if not self.document.get('md5_hash', None) == None:
            # Then we have an md5_hash
            self.loadByView(view='config_by_md5hash',
                            value=self.document['md5_hash'])
        # TODO: Add more views as they become available.

        #elif not self.owner == None:
        # Then we have an owner
        #self.loadByView(view = 'config_by_owner', value = self.owner['name'])

    def unwrapView(self, view):
        """
        _unwrapView_

        Move view information into the main document
        """

        self.document["_id"] = view['rows'][0].get('id')
        self.document["_rev"] = view['rows'][0].get('value').get('_rev')

    def setPSetTweaks(self, PSetTweak):
        """
        _setPSetTweaks_

        Set the PSet tweak details for the config.
        """
        self.document['pset_tweak_details'] = PSetTweak
        return

    def getPSetTweaks(self):
        """
        _getPSetTweaks_

        Retrieve the PSet tweak details.
        """
        return self.document['pset_tweak_details']

    def getOutputModuleInfo(self):
        """
        _getOutputModuleInfo_

        Retrieve the dataset information for the config in the ConfigCache.
        """
        psetTweaks = self.getPSetTweaks()
        if not 'process' in psetTweaks.keys():
            raise ConfigCacheException(
                "Could not find process field in PSet while getting output modules!"
            )
        try:
            outputModuleNames = psetTweaks["process"]["outputModules_"]
        except KeyError as ex:
            msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(msg)

        results = {}
        for outputModuleName in outputModuleNames:
            try:
                outModule = psetTweaks["process"][outputModuleName]
            except KeyError:
                msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName
                logging.error(msg)
                raise ConfigCacheException(msg)
            dataset = outModule.get("dataset", None)
            if dataset:
                results[outputModuleName] = {
                    "dataTier": outModule["dataset"]["dataTier"],
                    "filterName": outModule["dataset"]["filterName"]
                }
            else:
                results[outputModuleName] = {
                    "dataTier": None,
                    "filterName": None
                }

        return results

    def addConfig(self, newConfig, psetHash=None):
        """
        _addConfig_


        """
        # The newConfig parameter is a URL suitable for passing to urlopen.
        configString = urllib.urlopen(newConfig).read(-1)
        configMD5 = hashlib.md5(configString).hexdigest()

        self.document['md5_hash'] = configMD5
        self.document['pset_hash'] = psetHash
        self.attachments['configFile'] = configString
        return

    def getConfig(self):
        """
        _getConfig_

        Get the currently active config
        """
        return self.attachments.get('configFile', None)

    def getCouchID(self):
        """
        _getCouchID_

        Return the document's couchID
        """

        return self.document["_id"]

    def getCouchRev(self):
        """
        _getCouchRev_

        Return the document's couchRevision
        """

        return self.document["_rev"]

    @Decorators.requireGroup
    @Decorators.requireUser
    def delete(self):
        """
        _delete_

        Deletes the document with the current docid
        """
        if not self.document["_id"]:
            logging.error("Attempted to delete with no couch ID")

        # TODO: Delete without loading first
        try:
            self.database.queueDelete(self.document)
            self.database.commit()
        except Exception as ex:
            msg = "Error in deleting document from couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        return

    def getIDFromLabel(self, label):
        """
        _getIDFromLabel_

        Retrieve the ID of a config given it's label.
        """
        results = self.database.loadView("ConfigCache", "config_by_label", {
            "startkey": label,
            "limit": 1
        })

        if results["rows"][0]["key"] == label:
            return results["rows"][0]["value"]

        return None

    def listAllConfigsByLabel(self):
        """
        _listAllConfigsByLabel_

        Retrieve a list of all the configs in the config cache.  This is
        returned in the form of a dictionary that is keyed by label.
        """
        configs = {}
        results = self.database.loadView("ConfigCache", "config_by_label")

        for result in results["rows"]:
            configs[result["key"]] = result["value"]

        return configs

    def __str__(self):
        """
        Make something printable

        """

        return self.document.__str__()

    def validate(self, configID):

        try:
            #TODO: need to change to DataCache
            #self.loadDocument(configID = configID)
            self.loadByID(configID=configID)
        except Exception as ex:
            raise ConfigCacheException(
                "Failure to load ConfigCache while validating workload: %s" %
                str(ex))

        if self.detail:
            duplicateCheck = {}
            try:
                outputModuleInfo = self.getOutputModuleInfo()
            except Exception as ex:
                # Something's gone wrong with trying to open the configCache
                msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
                raise ConfigCacheException("%s: %s" % (msg, str(ex)))
            for outputModule in outputModuleInfo.values():
                dataTier = outputModule.get('dataTier', None)
                filterName = outputModule.get('filterName', None)
                if not dataTier:
                    raise ConfigCacheException("No DataTier in output module.")

                # Add dataTier to duplicate dictionary
                if not dataTier in duplicateCheck.keys():
                    duplicateCheck[dataTier] = []
                if filterName in duplicateCheck[dataTier]:
                    # Then we've seen this combination before
                    raise ConfigCacheException(
                        "Duplicate dataTier/filterName combination.")
                else:
                    duplicateCheck[dataTier].append(filterName)
            return outputModuleInfo
        else:
            return True
Exemplo n.º 53
0
class WMStatsReader(object):
    # TODO need to get this from reqmgr api
    ACTIVE_STATUS = ["new",
                     "assignment-approved",
                     "assigned",
                     "acquired",
                     "running",
                     "running-open",
                     "running-closed",
                     "failed",
                     "force-complete",
                     "completed",
                     "closed-out",
                     "announced",
                     "aborted",
                     "aborted-completed",
                     "rejected"]

    T0_ACTIVE_STATUS = ["new",
                        "Closed",
                        "Merge",
                        "Harvesting",
                        "Processing Done",
                        "AlcaSkim",
                        "completed"]

    def __init__(self, couchURL, appName="WMStats", reqdbURL=None, reqdbCouchApp="ReqMgr"):
        self._sanitizeURL(couchURL)
        # set the connection for local couchDB call
        self._commonInit(couchURL, appName)
        if reqdbURL:
            self.reqDB = RequestDBReader(reqdbURL, reqdbCouchApp)
        else:
            self.reqDB = None

    def _sanitizeURL(self, couchURL):
        return sanitizeURL(couchURL)['url']

    def _commonInit(self, couchURL, appName="WMStats"):
        """
        setting up comon variables for inherited class.
        inherited class should call this in their init function
        """

        self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.couchapp = appName
        self.defaultStale = {"stale": "update_after"}

    def setDefaultStaleOptions(self, options):
        if not options:
            options = {}
        if 'stale' not in options:
            options.update(self.defaultStale)
        return options

    def getLatestJobInfoByRequests(self, requestNames):
        jobInfoByRequestAndAgent = {}

        if len(requestNames) > 0:
            requestAndAgentKey = self._getRequestAndAgent(requestNames)
            jobInfoByRequestAndAgent = self._getLatestJobInfo(requestAndAgentKey)
        return jobInfoByRequestAndAgent

    def _updateRequestInfoWithJobInfo(self, requestInfo):
        if len(requestInfo.keys()) != 0:
            jobInfoByRequestAndAgent = self.getLatestJobInfoByRequests(requestInfo.keys())
            self._combineRequestAndJobData(requestInfo, jobInfoByRequestAndAgent)

    def _getCouchView(self, view, options, keys=None):
        keys = keys or []
        options = self.setDefaultStaleOptions(options)

        if keys and isinstance(keys, str):
            keys = [keys]
        return self.couchDB.loadView(self.couchapp, view, options, keys)

    def _formatCouchData(self, data, key="id"):
        result = {}
        for row in data['rows']:
            if 'error' in row:
                continue
            if "doc" in row:
                result[row[key]] = row["doc"]
            else:
                result[row[key]] = None
        return result

    def _combineRequestAndJobData(self, requestData, jobData):
        """
        update the request data with job info
        requestData['AgentJobInfo'] = {'vocms234.cern.ch:9999': {"_id":"d1d11dfcb30e0ab47db42007cb6fb847",
        "_rev":"1-8abfaa2de822ed081cb8d174e3e2c003",
        "status":{"inWMBS":334,"success":381,"submitted":{"retry":2,"pending":2},"failure":{"exception":3}},
        "agent_team":"testbed-integration","workflow":"amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731",
        "timestamp":1394738860,"sites":{"T2_CH_CERN_AI":{"submitted":{"retry":1,"pending":1}},
        "T2_CH_CERN":{"success":6,"submitted":{"retry":1,"pending":1}},
        "T2_DE_DESY":{"failure":{"exception":3},"success":375}},
        "agent":"WMAgentCommissioning",
        "tasks":
           {"/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production":
            {"status":{"failure":{"exception":3},"success":331},
             "sites":{"T2_DE_DESY": {"success":325,"wrappedTotalJobTime":11305908,
                                     "dataset":{},"failure":{"exception":3},
                                     "cmsRunCPUPerformance":{"totalJobCPU":10869688.8,
                                                             "totalEventCPU":10832426.7,
                                                             "totalJobTime":11255865.9},
                                     "inputEvents":0},
                      "T2_CH_CERN":{"success":6,"wrappedTotalJobTime":176573,
                                    "dataset":{},
                                    "cmsRunCPUPerformance":{"totalJobCPU":167324.8,
                                                            "totalEventCPU":166652.1,
                                                            "totalJobTime":174975.7},
                                    "inputEvents":0}},
             "subscription_status":{"updated":1393108089, "finished":2, "total":2,"open":0},
             "jobtype":"Production"},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput/ProductionRAWSIMoutputMergeLogCollect":
             {"jobtype":"LogCollect",
              "subscription_status":{"updated":1392885768,
              "finished":0,
              "total":1,"open":1}},
            "/amaltaro_OracleUpgrade_TEST_HG1401_140220_090116_6731/Production/ProductionMergeRAWSIMoutput":
              {"status":{"success":41,"submitted":{"retry":1,"pending":1}},
                "sites":{"T2_DE_DESY":{"datasetStat":{"totalLumis":973,"events":97300,"size":105698406915},
                                       "success":41,"wrappedTotalJobTime":9190,
                                       "dataset":{"/GluGluToHTohhTo4B_mH-350_mh-125_8TeV-pythia6-tauola/Summer12-OracleUpgrade_TEST_ALAN_HG1401-v1/GEN-SIM":
                                                   {"totalLumis":973,"events":97300,"size":105698406915}},
                                       "cmsRunCPUPerformance":{"totalJobCPU":548.92532,"totalEventCPU":27.449808,"totalJobTime":2909.92125},
                                    "inputEvents":97300},
                         "T2_CH_CERN":{"submitted":{"retry":1,"pending":1}}},
                "subscription_status":{"updated":1392885768,"finished":0,"total":1,"open":1},
                "jobtype":"Merge"},
           "agent_url":"vocms231.cern.ch:9999",
           "type":"agent_request"}}
        """
        if jobData:
            for row in jobData["rows"]:
                # condition checks if documents are deleted between calls.
                # just ignore in that case
                if row["doc"]:
                    jobInfo = requestData[row["doc"]["workflow"]]
                    jobInfo.setdefault("AgentJobInfo", {})
                    jobInfo["AgentJobInfo"][row["doc"]["agent_url"]] = row["doc"]

    def _getRequestAndAgent(self, filterRequest=None):
        """
        returns the [['request_name', 'agent_url'], ....]
        """
        options = {}
        options["reduce"] = True
        options["group"] = True
        result = self._getCouchView("requestAgentUrl", options)

        if filterRequest is None:
            keys = [row['key'] for row in result["rows"]]
        else:
            keys = [row['key'] for row in result["rows"] if row['key'][0] in filterRequest]
        return keys

    def _getLatestJobInfo(self, keys):
        """
        keys is [['request_name', 'agent_url'], ....]
        returns ids
        """
        if len(keys) == 0:
            return []
        options = {"include_docs": True}
        options["reduce"] = False
        result = self._getCouchView("latestRequest", options, keys)
        return result

    def _getAllDocsByIDs(self, ids, include_docs=True):
        """
        keys is [id, ....]
        returns document
        """
        if len(ids) == 0:
            return None
        options = {}
        options["include_docs"] = include_docs
        result = self.couchDB.allDocs(options, ids)

        return result

    def _getAgentInfo(self):
        """
        returns all the agents status on wmstats
        """
        options = {}
        result = self._getCouchView("agentInfo", options)

        return result

    def agentsByTeam(self, filterDrain=False):
        """
        return a dictionary like {team:#agents,...}
        """
        result = self._getAgentInfo()
        response = dict()

        for agentInfo in result["rows"]:
            #filtering empty string
            team = agentInfo['value']['agent_team']
            if not team:
                continue

            response.setdefault(team, 0)
            if filterDrain:
                if not agentInfo['value'].get('drain_mode', False):
                    response[team] += 1
            else:
                response[team] += 1

        return response

    def getServerInstance(self):
        return self.couchServer

    def getDBInstance(self):
        return self.couchDB

    def getRequestDBInstance(self):
        return self.reqDB

    def getHeartbeat(self):
        try:
            return self.couchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getRequestByNames(self, requestNames, jobInfoFlag=False):
        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        """
        requestInfo = self.reqDB.getRequestByNames(requestNames, True)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getActiveData(self, jobInfoFlag=False):

        return self.getRequestByStatus(WMStatsReader.ACTIVE_STATUS, jobInfoFlag)

    def getT0ActiveData(self, jobInfoFlag=False):

        return self.getRequestByStatus(WMStatsReader.T0_ACTIVE_STATUS, jobInfoFlag)

    def getRequestByStatus(self, statusList, jobInfoFlag=False, limit=None, skip=None,
                           legacyFormat=False):

        """
        To use this function reqDBURL need to be set when wmstats initialized.
        This will be deplicated so please don use this.
        If legacyFormat is True convert data to old wmstats format from current reqmgr format.
        Shouldn't be set to True unless existing code breaks
        """

        requestInfo = self.reqDB.getRequestByStatus(statusList, True, limit, skip)

        if legacyFormat:
            # convert the format to wmstas old format
            for requestName, doc in requestInfo.items():
                requestInfo[requestName] = convertToLegacyFormat(doc)

        if jobInfoFlag:
            # get request and agent info
            self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getRequestSummaryWithJobInfo(self, requestName):
        """
        get request info with job status
        """
        requestInfo = self.reqDB.getRequestByNames(requestName)
        self._updateRequestInfoWithJobInfo(requestInfo)
        return requestInfo

    def getArchivedRequests(self):
        """
        get list of archived workflow in wmstats db.
        """

        options = {"group_level": 1, "reduce": True}

        results = self.couchDB.loadView(self.couchapp, "allWorkflows", options=options)['rows']
        requestNames = [x['key'] for x in results]

        workflowDict = self.reqDB.getStatusAndTypeByRequest(requestNames)
        archivedRequests = []
        for request, value in workflowDict.items():
            if value[0].endswith("-archived"):
                archivedRequests.append(request)

        return archivedRequests

    def isWorkflowCompletedWithLogCollectAndCleanUp(self, requestName):
        """
        check whether workflow  is completed including LogCollect and CleanUp tasks
        TODO: If the parent task all failed and next task are not created at all,
            It can't detect complete status.
            If the one of the task doesn't contain any jobs, it will return False
        """

        requestInfo = self.getRequestSummaryWithJobInfo(requestName)
        reqInfoInstance = RequestInfo(requestInfo[requestName])
        return reqInfoInstance.isWorkflowFinished()

    def getTaskJobSummaryByRequest(self, requestName, sampleSize=1):

        options = {'reduce': True, 'group_level': 5, 'startkey': [requestName],
                   'endkey': [requestName, {}]}
        results = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options)
        jobDetails = {}
        for row in results['rows']:
            # row["key"] = ['workflow', 'task', 'jobstatus', 'exitCode', 'site']
            startKey = row["key"][:4]
            endKey = []
            site = row["key"][4]
            if site:
                startKey.append(site)

            endKey.extend(startKey)
            endKey.append({})
            numOfError = row["value"]

            jobInfo = self.jobDetailByTasks(startKey, endKey, numOfError, sampleSize)
            jobDetails = nestedDictUpdate(jobDetails, jobInfo)
        return jobDetails

    def jobDetailByTasks(self, startKey, endKey, numOfError, limit=1):
        options = {'include_docs': True, 'reduce': False,
                   'startkey': startKey, 'endkey': endKey,
                   'limit': limit}
        result = self.couchDB.loadView(self.couchapp, "jobsByStatusWorkflow", options=options)
        jobInfoDoc = {}
        for row in result['rows']:
            keys = row['key']
            workflow = keys[0]
            task = keys[1]
            jobStatus = keys[2]
            exitCode = keys[3]
            site = keys[4]

            jobInfoDoc.setdefault(workflow, {})
            jobInfoDoc[workflow].setdefault(task, {})
            jobInfoDoc[workflow][task].setdefault(jobStatus, {})
            jobInfoDoc[workflow][task][jobStatus].setdefault(exitCode, {})
            jobInfoDoc[workflow][task][jobStatus][exitCode].setdefault(site, {})
            finalStruct = jobInfoDoc[workflow][task][jobStatus][exitCode][site]
            finalStruct["errorCount"] = numOfError
            finalStruct.setdefault("samples", [])
            finalStruct["samples"].append(row["doc"])

        return jobInfoDoc

    def getAllAgentRequestRevByID(self, agentURL):
        options = {"reduce": False}
        results = self.couchDB.loadView(self.couchapp, "byAgentURL", options=options, keys=[agentURL])
        idRevMap = {}
        for row in results['rows']:
            idRevMap[row['id']] = row['value']['rev']

        return idRevMap
Exemplo n.º 54
0
class WorkQueueBackend(object):
    """
    Represents persistent storage for WorkQueue
    """
    def __init__(self,
                 db_url,
                 db_name='workqueue',
                 inbox_name=None,
                 parentQueue=None,
                 queueUrl=None,
                 logger=None):
        if logger:
            self.logger = logger
        else:
            import logging
            self.logger = logging

        if inbox_name is None:
            inbox_name = "%s_inbox" % db_name

        self.server = CouchServer(db_url)
        self.parentCouchUrlWithAuth = parentQueue
        if parentQueue:
            self.parentCouchUrl = sanitizeURL(parentQueue)['url']
        else:
            self.parentCouchUrl = None
        self.db = self.server.connectDatabase(db_name,
                                              create=False,
                                              size=10000)
        self.hostWithAuth = db_url
        self.inbox = self.server.connectDatabase(inbox_name,
                                                 create=False,
                                                 size=10000)
        self.queueUrl = sanitizeURL(queueUrl
                                    or (db_url + '/' + db_name))['url']
        self.eleKey = 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'

    def forceQueueSync(self):
        """Force a blocking replication - used only in tests"""
        self.pullFromParent(continuous=False)
        self.sendToParent(continuous=False)

    def pullFromParent(self, continuous=True, cancel=False):
        """Replicate from parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source=self.parentCouchUrl,
                                      destination="%s/%s" %
                                      (self.hostWithAuth, self.inbox.name),
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication from %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def sendToParent(self, continuous=True, cancel=False):
        """Replicate to parent couch - blocking: used only int test"""
        try:
            if self.parentCouchUrl and self.queueUrl:
                self.server.replicate(source="%s" % self.inbox.name,
                                      destination=self.parentCouchUrlWithAuth,
                                      filter='WorkQueue/queueFilter',
                                      query_params={
                                          'childUrl': self.queueUrl,
                                          'parentUrl': self.parentCouchUrl
                                      },
                                      continuous=continuous,
                                      cancel=cancel)
        except Exception as ex:
            self.logger.warning('Replication to %s failed: %s' %
                                (self.parentCouchUrl, str(ex)))

    def getElementsForSplitting(self):
        """Returns the elements from the inbox that need to be split,
        if WorkflowName specified only return elements to split for that workflow"""
        elements = self.getInboxElements(status='Negotiating')
        specs = {}  # cache as may have multiple elements for same spec
        for ele in elements:
            if ele['RequestName'] not in specs:
                wmspec = WMWorkloadHelper()
                wmspec.load(self.parentCouchUrlWithAuth +
                            "/%s/spec" % ele['RequestName'])
                specs[ele['RequestName']] = wmspec
            ele['WMSpec'] = specs[ele['RequestName']]
        del specs
        return elements

    def insertWMSpec(self, wmspec):
        """
        Insert WMSpec to backend
        """
        # Can't save spec to inbox, it needs to be visible to child queues
        # Can't save empty dict so add dummy variable
        dummy_values = {'name': wmspec.name()}
        # change specUrl in spec before saving (otherwise it points to previous url)
        wmspec.setSpecUrl(self.db['host'] + "/%s/%s/spec" %
                          (self.db.name, wmspec.name()))
        return wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values)

    def getWMSpec(self, name):
        """Get the spec"""
        wmspec = WMWorkloadHelper()
        wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, name))
        return wmspec

    def insertElements(self, units, parent=None):
        """
        Insert element to database

        @param parent is the parent WorkQueueObject these element's belong to.
                                            i.e. a workflow which has been split
        """
        if not units:
            return
        # store spec file separately - assume all elements share same spec
        self.insertWMSpec(units[0]['WMSpec'])
        newUnitsInserted = []
        for unit in units:

            # cast to couch
            if not isinstance(unit, CouchWorkQueueElement):
                unit = CouchWorkQueueElement(self.db, elementParams=dict(unit))

            if parent:
                unit['ParentQueueId'] = parent.id
                unit['TeamName'] = parent['TeamName']
                unit['WMBSUrl'] = parent['WMBSUrl']

            if unit._couch.documentExists(unit.id):
                self.logger.info(
                    'Element "%s" already exists, skip insertion.' % unit.id)
                continue
            else:
                newUnitsInserted.append(unit)
            unit.save()
            unit._couch.commit(all_or_nothing=True)

        return newUnitsInserted

    def createWork(self, spec, **kwargs):
        """Return the Inbox element for this spec.

        This does not persist it to the database.
        """
        kwargs.update({
            'WMSpec': spec,
            'RequestName': spec.name(),
            'StartPolicy': spec.startPolicyParameters(),
            'EndPolicy': spec.endPolicyParameters(),
            'OpenForNewData': False
        })
        unit = CouchWorkQueueElement(self.inbox, elementParams=kwargs)
        unit.id = spec.name()
        return unit

    def getElements(self,
                    status=None,
                    elementIDs=None,
                    returnIdOnly=False,
                    db=None,
                    loadSpec=False,
                    WorkflowName=None,
                    **elementFilters):
        """Return elements that match requirements

        status, elementIDs & filters are 'AND'ed together to filter elements.
        returnIdOnly causes the element not to be loaded and only the id returned
        db is used to specify which database to return from
        loadSpec causes the workflow for each spec to be loaded.
        WorkflowName may be used in the place of RequestName
        """
        key = []
        if not db:
            db = self.db
        if elementFilters.get('RequestName') and not WorkflowName:
            WorkflowName = elementFilters.pop('RequestName')

        if elementIDs:
            if elementFilters or status or returnIdOnly:
                raise ValueError(
                    "Can't specify extra filters (or return id's) when using element id's with getElements()"
                )
            elements = [
                CouchWorkQueueElement(db, i).load() for i in elementIDs
            ]
        else:
            options = {
                'include_docs': True,
                'filter': elementFilters,
                'idOnly': returnIdOnly,
                'reduce': False
            }
            # filter on workflow or status if possible
            filterName = 'elementsByWorkflow'
            if WorkflowName:
                key.append(WorkflowName)
            elif status:
                filterName = 'elementsByStatus'
                key.append(status)
            elif elementFilters.get('SubscriptionId'):
                key.append(elementFilters['SubscriptionId'])
                filterName = 'elementsBySubscription'
            # add given params to filters
            if status:
                options['filter']['Status'] = status
            if WorkflowName:
                options['filter']['RequestName'] = WorkflowName

            view = db.loadList('WorkQueue', 'filter', filterName, options, key)
            view = json.loads(view)
            if returnIdOnly:
                return view
            elements = [
                CouchWorkQueueElement.fromDocument(db, row) for row in view
            ]

        if loadSpec:
            specs = {}  # cache as may have multiple elements for same spec
            for ele in elements:
                if ele['RequestName'] not in specs:
                    wmspec = self.getWMSpec(ele['RequestName'])
                    specs[ele['RequestName']] = wmspec
                ele['WMSpec'] = specs[ele['RequestName']]
            del specs
        return elements

    def getInboxElements(self, *args, **kwargs):
        """
        Return elements from Inbox, supports same semantics as getElements()
        """
        return self.getElements(*args, db=self.inbox, **kwargs)

    def getElementsForWorkflow(self, workflow):
        """Get elements for a workflow"""
        elements = self.db.loadView('WorkQueue', 'elementsByWorkflow', {
            'key': workflow,
            'include_docs': True,
            'reduce': False
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParent(self, parent):
        """Get elements with the given parent"""
        elements = self.db.loadView('WorkQueue', 'elementsByParent', {
            'key': parent.id,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def saveElements(self, *elements):
        """Persist elements

        Returns elements successfully saved, user must verify to catch errors
        """
        result = []
        if not elements:
            return result
        for element in elements:
            element.save()
        answer = elements[0]._couch.commit()
        result, failures = formatReply(answer, *elements)
        msg = 'Couch error saving element: "%s", error "%s", reason "%s"'
        for failed in failures:
            self.logger.error(
                msg % (failed['id'], failed['error'], failed['reason']))
        return result

    def _raiseConflictErrorAndLog(self,
                                  conflictIDs,
                                  updatedParams,
                                  dbName="workqueue"):
        errorMsg = "Need to update this element manually from %s\n ids:%s\n, parameters:%s\n" % (
            dbName, conflictIDs, updatedParams)
        self.logger.error(errorMsg)
        raise WorkQueueError(errorMsg)

    def updateElements(self, *elementIds, **updatedParams):
        """Update given element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.db.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams)
        return

    def updateInboxElements(self, *elementIds, **updatedParams):
        """Update given inbox element's (identified by id) with new parameters"""
        if not elementIds:
            return
        eleParams = {}
        eleParams[self.eleKey] = updatedParams
        conflictIDs = self.inbox.updateBulkDocumentsWithConflictHandle(
            elementIds, eleParams)
        if conflictIDs:
            self._raiseConflictErrorAndLog(conflictIDs, updatedParams,
                                           "workqueue_inbox")
        return

    def deleteElements(self, *elements):
        """Delete elements"""
        if not elements:
            return
        specs = {}
        for i in elements:
            i.delete()
            specs[i['RequestName']] = None
        answer = elements[0]._couch.commit()
        _, failures = formatReply(answer, *elements)
        msg = 'Couch error deleting element: "%s", error "%s", reason "%s"'
        for failed in failures:
            # only count delete as failed if document still exists
            if elements[0]._couch.documentExists(failed['id']):
                self.logger.error(
                    msg % (failed['id'], failed['error'], failed['reason']))
        # delete specs if no longer used
        for wf in specs:
            try:
                if not self.db.loadView('WorkQueue', 'elementsByWorkflow', {
                        'key': wf,
                        'limit': 1,
                        'reduce': False
                })['rows']:
                    self.db.delete_doc(wf)
            except CouchNotFoundError:
                pass

    def availableWork(self,
                      thresholds,
                      siteJobCounts,
                      team=None,
                      wfs=None,
                      excludeWorkflows=None,
                      numElems=9999999):
        """
        Get work which is available to be run

        Assume thresholds is a dictionary; keys are the site name, values are
        the maximum number of running jobs at that site.

        Assumes site_job_counts is a dictionary-of-dictionaries; keys are the site
        name and task priorities.  The value is the number of jobs running at that
        priority.

        It will pull work until it reaches the number of elements configured (numElems).
        Since it's also used for calculating free resources, default it to "infinity"

        Note: this method will be called with no limit of work elements when it's simply
        calculating the resources available (based on what is in LQ), before it gets work
        from GQ
        """
        self.logger.info("Getting up to %d available work from %s", numElems,
                         self.queueUrl)

        excludeWorkflows = excludeWorkflows or []
        elements = []
        sortedElements = []

        # We used to pre-filter sites, looking to see if there are idle job slots
        # We don't do this anymore, as we may over-allocate
        # jobs to sites if the new jobs have a higher priority.

        # If there are no sites, punt early.
        if not thresholds:
            self.logger.error("No thresholds is set: Please check")
            return elements, thresholds, siteJobCounts

        options = {}
        options['include_docs'] = True
        options['descending'] = True
        options['resources'] = thresholds
        if team:
            options['team'] = team
            self.logger.info("setting team to %s" % team)
        if wfs:
            result = []
            for i in xrange(0, len(wfs), 20):
                options['wfs'] = wfs[i:i + 20]
                data = self.db.loadList('WorkQueue', 'workRestrictions',
                                        'availableByPriority', options)
                result.extend(json.loads(data))
        else:
            result = self.db.loadList('WorkQueue', 'workRestrictions',
                                      'availableByPriority', options)
            result = json.loads(result)
            if len(result) == 0:
                self.logger.info(
                    """No available work in WQ or didn't pass workqueue restriction
                                    - check Pileup, site white list, etc""")
            self.logger.debug("Available Work:\n %s \n for resources\n %s" %
                              (result, thresholds))
        # Iterate through the results; apply whitelist / blacklist / data
        # locality restrictions.  Only assign jobs if they are high enough
        # priority.
        for i in result:
            element = CouchWorkQueueElement.fromDocument(self.db, i)
            # filter out exclude list from abvaling
            if element['RequestName'] not in excludeWorkflows:
                sortedElements.append(element)

        # sort elements to get them in priority first and timestamp order
        sortedElements.sort(key=lambda element: element['CreationTime'])
        sortedElements.sort(key=lambda x: x['Priority'], reverse=True)

        for element in sortedElements:
            if numElems <= 0:
                self.logger.info(
                    "Reached the maximum number of elements to be pulled: %d",
                    len(elements))
                break

            if not possibleSites(element):
                self.logger.info("No possible sites for %s with doc id %s",
                                 element['RequestName'], element.id)
                continue

            prio = element['Priority']
            possibleSite = None
            sites = thresholds.keys()
            random.shuffle(sites)
            for site in sites:
                if element.passesSiteRestriction(site):
                    # Count the number of jobs currently running of greater priority
                    curJobCount = sum([
                        x[1] if x[0] >= prio else 0
                        for x in siteJobCounts.get(site, {}).items()
                    ])
                    self.logger.debug(
                        "Job Count: %s, site: %s thresholds: %s" %
                        (curJobCount, site, thresholds[site]))
                    if curJobCount < thresholds[site]:
                        possibleSite = site
                        break

            if possibleSite:
                numElems -= 1
                self.logger.debug("Possible site exists %s" %
                                  str(possibleSite))
                elements.append(element)
                if possibleSite not in siteJobCounts:
                    siteJobCounts[possibleSite] = {}
                siteJobCounts[possibleSite][prio] = siteJobCounts[possibleSite].setdefault(prio, 0) + \
                                                    element['Jobs'] * element.get('blowupFactor', 1.0)
            else:
                self.logger.debug(
                    "No available resources for %s with doc id %s",
                    element['RequestName'], element.id)

        return elements, thresholds, siteJobCounts

    def getActiveData(self):
        """Get data items we have work in the queue for"""
        data = self.db.loadView('WorkQueue', 'activeData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActiveParentData(self):
        """Get data items we have work in the queue for with parent"""
        data = self.db.loadView('WorkQueue', 'activeParentData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getActivePileupData(self):
        """Get data items we have work in the queue for with pileup"""
        data = self.db.loadView('WorkQueue', 'activePileupData', {
            'reduce': True,
            'group': True
        })
        return [{
            'dbs_url': x['key'][0],
            'name': x['key'][1]
        } for x in data.get('rows', [])]

    def getElementsForData(self, data):
        """Get active elements for this dbs & data combo"""
        elements = self.db.loadView('WorkQueue', 'elementsByData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForParentData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByParentData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def getElementsForPileupData(self, data):
        """Get active elements for this data """
        elements = self.db.loadView('WorkQueue', 'elementsByPileupData', {
            'key': data,
            'include_docs': True
        })
        return [
            CouchWorkQueueElement.fromDocument(self.db, x['doc'])
            for x in elements.get('rows', [])
        ]

    def isAvailable(self):
        """Is the server available, i.e. up and not compacting"""
        try:
            compacting = self.db.info()['compact_running']
            if compacting:
                self.logger.info("CouchDB compacting - try again later.")
                return False
        except Exception as ex:
            self.logger.error("CouchDB unavailable: %s" % str(ex))
            return False
        return True

    def getWorkflows(self, includeInbox=False, includeSpecs=False):
        """Returns workflows known to workqueue"""
        result = set([
            x['key'] for x in self.db.loadView(
                'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
        ])
        if includeInbox:
            result = result | set([
                x['key'] for x in self.inbox.loadView(
                    'WorkQueue', 'elementsByWorkflow', {'group': True})['rows']
            ])
        if includeSpecs:
            result = result | set([
                x['key'] for x in self.db.loadView('WorkQueue',
                                                   'specsByWorkflow')['rows']
            ])
        return list(result)

    def queueLength(self):
        """Return number of available elements"""
        return self.db.loadView('WorkQueue', 'availableByPriority',
                                {'limit': 0})['total_rows']

    def fixConflicts(self):
        """Fix elements in conflict

        Each local queue runs this to resolve its conflicts with global,
        resolution propagates up to global.

        Conflicting elements are merged into one element with others deleted.

        This will fail if elements are modified during the resolution -
        if this happens rerun.
        """
        for db in [self.inbox, self.db]:
            for row in db.loadView('WorkQueue', 'conflicts')['rows']:
                element_id = row['id']
                try:
                    conflicting_elements = [CouchWorkQueueElement.fromDocument(db, db.document(element_id, rev)) \
                                            for rev in row['value']]
                    fixed_elements = fixElementConflicts(*conflicting_elements)
                    if self.saveElements(fixed_elements[0]):
                        self.saveElements(
                            *fixed_elements[1:]
                        )  # delete others (if merged value update accepted)
                except Exception as ex:
                    self.logger.error("Error resolving conflict for %s: %s" %
                                      (element_id, str(ex)))

    def recordTaskActivity(self, taskname, comment=''):
        """Record a task for monitoring"""
        try:
            record = self.db.document('task_activity')
        except CouchNotFoundError:
            record = Document('task_activity')
        record.setdefault('tasks', {})
        record['tasks'].setdefault(taskname, {})
        record['tasks'][taskname]['timestamp'] = time.time()
        record['tasks'][taskname]['comment'] = comment
        try:
            self.db.commitOne(record)
        except Exception as ex:
            self.logger.error("Unable to update task %s freshness: %s" %
                              (taskname, str(ex)))

    def getWMBSInjectStatus(self, request=None):
        """
        This service only provided by global queue except on draining agent
        """
        options = {'group': True, 'reduce': True}
        if request:
            options.update(key=request)
        data = self.db.loadView('WorkQueue', 'wmbsInjectStatusByRequest',
                                options)
        if request:
            if data['rows']:
                injectionStatus = data['rows'][0]['value']
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                return injectionStatus and not requestOpen
            else:
                raise WorkQueueNoMatchingElements("%s not found" % request)
        else:
            injectionStatus = dict(
                (x['key'], x['value']) for x in data.get('rows', []))
            finalInjectionStatus = []
            for request in injectionStatus.keys():
                inboxElement = self.getInboxElements(WorkflowName=request)
                requestOpen = inboxElement[0].get(
                    'OpenForNewData', False) if inboxElement else False
                finalInjectionStatus.append(
                    {request: injectionStatus[request] and not requestOpen})

            return finalInjectionStatus

    def getWorkflowNames(self, inboxFlag=False):
        """Get workflow names from workqueue db"""
        if inboxFlag:
            db = self.inbox
        else:
            db = self.db
        data = db.loadView('WorkQueue', 'elementsByWorkflow', {
            'stale': "update_after",
            'reduce': True,
            'group': True
        })
        return [x['key'] for x in data.get('rows', [])]

    def deleteWQElementsByWorkflow(self, workflowNames):
        """
        delete workqueue elements belongs to given workflow names
        it doen't check the status of workflow so need to be careful to use this.
        Pass only workflows which has the end status
        """
        deleted = 0
        dbs = [self.db, self.inbox]
        if not isinstance(workflowNames, list):
            workflowNames = [workflowNames]

        if len(workflowNames) == 0:
            return deleted
        options = {}
        options["stale"] = "update_after"
        options["reduce"] = False

        for couchdb in dbs:
            result = couchdb.loadView("WorkQueue", "elementsByWorkflow",
                                      options, workflowNames)
            ids = []
            for entry in result["rows"]:
                ids.append(entry["id"])
            if ids:
                couchdb.bulkDeleteByIDs(ids)
                deleted += len(ids)
        # delete the workflow with spec from workqueue db
        for wf in workflowNames:
            self.db.delete_doc(wf)
        return deleted
Exemplo n.º 55
0
def main(options):
    url, dbName = splitCouchServiceURL(options.dburl)
    db = CouchServer(url).connectDatabase(dbName)
    reqmgr_requests = generate_reqmgr_requests(options.requests)
    agent_requests = generate_agent_requests(options.requests,
                                             options.iterations)

    if options.add_couchapp:
        installCouchApp(url, dbName, "WMStats", options.couchapp_base)

    if options.add_reqmgr_data:
        for req in reqmgr_requests:
            db.queue(req)
        db.commit()
        print("Added %s reqmgr requests" % len(reqmgr_requests))

    if options.add_agent_data:
        for req in agent_requests:
            db.queue(req)
            jobDocs = generate_jobsummary(req['workflow'])
            for job in jobDocs:
                db.queue(job)
        db.commit()
        print("Added %s agent requests" % len(agent_requests))
        print("Added %s job Docs" % (len(agent_requests) * len(jobDocs)))
Exemplo n.º 56
0
def gatherWMDataMiningStats(wmstatsUrl,
                            reqmgrUrl,
                            wmMiningUrl,
                            mcmUrl,
                            mcmCert,
                            mcmKey,
                            tmpDir,
                            archived=False,
                            log=logging.info):
    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl,
                            reqdbURL=reqmgrUrl,
                            reqdbCouchApp="ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." %
             (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = [
            'normal-archived', 'rejected-archived', 'aborted-archived'
        ]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates,
                                          jobInfoFlag=jobInfoFlag,
                                          legacyFormat=True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({
                    'filterEfficiency': filterEfficiency,
                    'runWhiteList': runWhiteList
                })

            if oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or \
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown':

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" %
                             (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID=prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry[
                                    'step'] == 'announced':
                                dateString = entry['updater'][
                                    'submission_date']
                                dt = datetime.strptime(dateString,
                                                       '%Y-%m-%d-%H-%M')
                                report[wf].update({
                                    'mcmApprovalTime':
                                    time.mktime(dt.timetuple())
                                })
                                found = True
                        if not found:
                            log.error(
                                "History found but no approval time for %s" %
                                wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime': 'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, dummy_exc_value, dummy_exc_traceback = sys.exc_info(
                        )
                        log.error(
                            "%s getting history from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting history from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID=prepID)
                        report[wf].update({
                            'mcmTotalEvents':
                            mcmRequest.get('total_events', 'NoMcMData')
                        })
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. May be transient and/or SSO problem."
                            % (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error(
                            "%s getting request from McM for PREP ID %s. Unknown error."
                            % (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf].get('priority', 0)
            requestType = requests[wf].get('request_type', 'Unknown')
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf].get('campaign', 'Unknown')
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" %
                          wf)  # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error(
                    "Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[
                wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent,
                                       events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime': int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[
                        wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime': int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime': approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime': assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime': acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime': closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime': announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime': completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime': newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime': archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate': requestDate})
            except:
                pass

            report[wf].update({
                'priority': priority,
                'status': finalStatus,
                'type': requestType
            })
            report[wf].update({
                'totalLumis': targetLumis,
                'totalEvents': targetEvents,
            })
            report[wf].update({
                'campaign': campaign,
                'prepID': prep_id,
                'outputTier': outputTier,
            })
            report[wf].update({
                'outputDatasets': outputdatasets,
                'inputDataset': inputdataset,
            })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [
                    1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100
            ]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(
                    percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({
                'eventProgress': eventProgress,
                'lumiProgress': lumiProgress,
            })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    # log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    dummy = json.dumps(
                        newCouchDoc
                    )  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" %
                              pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Exemplo n.º 57
0
    def __init__(self, config):
        """
        __init__

        Create all DAO objects that are used by this class.
        """
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        self.getOutputMapAction = self.daofactory(
            classname="Jobs.GetOutputMap")
        self.bulkAddToFilesetAction = self.daofactory(
            classname="Fileset.BulkAddByLFN")
        self.bulkParentageAction = self.daofactory(
            classname="Files.AddBulkParentage")
        self.getJobTypeAction = self.daofactory(classname="Jobs.GetType")
        self.getParentInfoAction = self.daofactory(
            classname="Files.GetParentInfo")
        self.setParentageByJob = self.daofactory(
            classname="Files.SetParentageByJob")
        self.setParentageByMergeJob = self.daofactory(
            classname="Files.SetParentageByMergeJob")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationByLFN")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.jobCompleteInput = self.daofactory(classname="Jobs.CompleteInput")
        self.setBulkOutcome = self.daofactory(classname="Jobs.SetOutcomeBulk")
        self.getWorkflowSpec = self.daofactory(
            classname="Workflow.GetSpecAndNameFromTask")
        self.getJobInfoByID = self.daofactory(classname="Jobs.LoadFromID")
        self.getFullJobInfo = self.daofactory(
            classname="Jobs.LoadForErrorHandler")
        self.getJobTaskNameAction = self.daofactory(
            classname="Jobs.GetFWJRTaskName")
        self.pnn_to_psn = self.daofactory(
            classname="Locations.GetPNNtoPSNMapping").execute()

        self.dbsStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetStatus")
        self.dbsParentStatusAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetParentStatus")
        self.dbsChildrenAction = self.dbsDaoFactory(
            classname="DBSBufferFiles.GetChildren")
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsSetRunLumi = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddRunLumi")
        self.dbsGetWorkflow = self.dbsDaoFactory(classname="ListWorkflow")

        self.dbsLFNHeritage = self.dbsDaoFactory(
            classname="DBSBufferFiles.BulkHeritageParent")

        self.stateChanger = ChangeState(config)

        # Decide whether or not to attach jobReport to returned value
        self.returnJobReport = getattr(config.JobAccountant,
                                       'returnReportFromWorker', False)

        # Store location for the specs for DBS
        self.specDir = getattr(config.JobAccountant, 'specDir', None)

        # maximum RAW EDM size for Repack output before data is put into Error dataset and skips PromptReco
        self.maxAllowedRepackOutputSize = getattr(
            config.JobAccountant, 'maxAllowedRepackOutputSize',
            12 * 1024 * 1024 * 1024)

        # ACDC service
        self.dataCollection = DataCollectionService(
            url=config.ACDC.couchurl, database=config.ACDC.database)

        jobDBurl = sanitizeURL(config.JobStateMachine.couchurl)['url']
        jobDBName = config.JobStateMachine.couchDBName
        jobCouchdb = CouchServer(jobDBurl)
        self.fwjrCouchDB = jobCouchdb.connectDatabase("%s/fwjrs" % jobDBName)
        self.localWMStats = WMStatsWriter(config.TaskArchiver.localWMStatsURL,
                                          appName="WMStatsAgent")

        # Hold data for later commital
        self.dbsFilesToCreate = []
        self.wmbsFilesToBuild = []
        self.wmbsMergeFilesToBuild = []
        self.fileLocation = None
        self.mergedOutputFiles = []
        self.listOfJobsToSave = []
        self.listOfJobsToFail = []
        self.filesetAssoc = []
        self.parentageBinds = []
        self.parentageBindsForMerge = []
        self.jobsWithSkippedFiles = {}
        self.count = 0
        self.datasetAlgoID = collections.deque(maxlen=1000)
        self.datasetAlgoPaths = collections.deque(maxlen=1000)
        self.dbsLocations = set()
        self.workflowIDs = collections.deque(maxlen=1000)
        self.workflowPaths = collections.deque(maxlen=1000)

        self.phedex = PhEDEx()
        self.locLists = self.phedex.getNodeMap()

        return
Exemplo n.º 58
0
class RequestDBReader():
    def __init__(self, couchURL, couchapp="ReqMgr"):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        self._commonInit(couchURL, couchapp)

    def _commonInit(self, couchURL, couchapp):
        """
        setting up comon variables for inherited class.
        inherited class should call this in their init function
        """
        if isinstance(couchURL, Database):
            self.couchDB = couchURL
            self.couchURL = self.couchDB['host']
            self.dbName = self.couchDB.name
            self.couchServer = CouchServer(self.couchURL)
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
            self.couchServer = CouchServer(self.couchURL)
            self.couchDB = self.couchServer.connectDatabase(self.dbName, False)
        self.couchapp = couchapp
        self.defaultStale = {"stale": "update_after"}

    def setDefaultStaleOptions(self, options):
        if not options:
            options = {}
        if 'stale' not in options:
            options.update(self.defaultStale)
        return options

    def _setNoStale(self):
        """
        Use this only for the unittest
        """
        self.defaultStale = {}

    def _getCouchView(self, view, options, keys=[]):

        options = self.setDefaultStaleOptions(options)

        if keys and isinstance(keys, basestring):
            keys = [keys]
        return self.couchDB.loadView(self.couchapp, view, options, keys)

    def _filterCouchInfo(self, couchInfo):
        # remove the couch specific information
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]
        return

    def _formatCouchData(self,
                         data,
                         key="id",
                         detail=True,
                         filterCouch=True,
                         returnDict=False):
        result = {}
        for row in data['rows']:
            if 'error' in row:
                continue
            if "doc" in row:
                if filterCouch:
                    self._filterCouchInfo(row["doc"])
                result[row[key]] = row["doc"]
            else:
                result[row[key]] = row["value"]
        if detail or returnDict:
            return result
        else:
            return result.keys()

    def _getRequestByNames(self, requestNames, detail):
        """
        'status': list of the status
        """
        options = {}
        options["include_docs"] = detail
        result = self.couchDB.allDocs(options, requestNames)
        return result

    def _getRequestByStatus(self, statusList, detail, limit, skip):
        """
        'status': list of the status
        """
        options = {}
        options["include_docs"] = detail

        if limit != None:
            options["limit"] = limit
        if skip != None:
            options["skip"] = skip
        keys = statusList
        return self._getCouchView("bystatus", options, keys)

    def _getRequestByStatusAndStartTime(self, status, detail, endTime):
        """
        'status': is the status of the workflow
        'startTime': unix timestamp for start time
        """
        options = {}
        options["include_docs"] = detail
        options["startkey"] = [status, 0]
        options["endkey"] = [status, endTime]
        options["descending"] = False

        return self._getCouchView("bystatusandtime", options)

    def _getRequestByTeamAndStatus(self, team, status, limit):
        """
        'status': is the status of the workflow
        'startTime': unix timestamp for start time
        """
        options = {}
        if limit:
            options["limit"] = limit
        if team and status:
            options["key"] = [team, status]
        elif team and not status:
            options["startkey"] = [team]
            options["endkey"] = [team, status]  # status = {}

        return self._getCouchView("byteamandstatus", options)

    def _getAllDocsByIDs(self, ids, include_docs=True):
        """
        keys is [id, ....]
        returns document
        """
        if len(ids) == 0:
            return []
        options = {}
        options["include_docs"] = include_docs
        result = self.couchDB.allDocs(options, ids)

        return result

    def getDBInstance(self):
        return self.couchDB

    def getRequestByNames(self, requestNames, detail=True):
        if isinstance(requestNames, basestring):
            requestNames = [requestNames]
        if len(requestNames) == 0:
            return {}
        data = self._getRequestByNames(requestNames, detail=detail)

        requestInfo = self._formatCouchData(data, detail=detail)
        return requestInfo

    def getRequestByStatus(self,
                           statusList,
                           detail=False,
                           limit=None,
                           skip=None):

        data = self._getRequestByStatus(statusList, detail, limit, skip)
        requestInfo = self._formatCouchData(data, detail=detail)

        return requestInfo

    def getRequestByStatusAndStartTime(self, status, detail=False, endTime=0):

        if endTime == 0:
            data = self._getRequestByStatus([status],
                                            detail,
                                            limit=None,
                                            skip=None)
        else:
            data = self._getRequestByStatusAndStartTime(
                status, detail, endTime)

        requestInfo = self._formatCouchData(data, detail=detail)

        return requestInfo

    def getRequestByTeamAndStatus(self,
                                  team,
                                  status,
                                  detail=False,
                                  limit=None):
        """
        'team': team name in which the workflow was assigned to.
        'status': a single status string.
        """
        if team and status:
            data = self._getRequestByTeamAndStatus(team, status, limit)
        elif team and not status:
            data = self._getRequestByTeamAndStatus(team,
                                                   status={},
                                                   limit=limit)
        elif not team and not status:
            data = self._getRequestByTeamAndStatus(team={},
                                                   status={},
                                                   limit=limit)
        else:
            # nothing we can do with status only
            return

        requestInfo = self._formatCouchData(data, detail=detail)
        return requestInfo

    def getRequestByCouchView(self, view, options, keys=[], returnDict=True):
        options.setdefault("include_docs", True)
        data = self._getCouchView(view, options, keys)
        requestInfo = self._formatCouchData(data, returnDict=returnDict)
        return requestInfo

    def getStatusAndTypeByRequest(self, requestNames):
        if isinstance(requestNames, basestring):
            requestNames = [requestNames]
        if len(requestNames) == 0:
            return {}
        data = self._getCouchView("byrequest", {}, requestNames)
        requestInfo = self._formatCouchData(data, returnDict=True)
        return requestInfo
Exemplo n.º 59
0
    def testF_TaskChain(self):
        """
        _TaskChain_
  
        Test the monstrous TaskChain workflow
        This will be a long one
  
        NOTE: This test is so complicated that all I do is
        take code from TaskChain_t and make sure it still
        produces and actual request
          
        """
        couchServer = CouchServer(os.environ["COUCHURL"])
        configDatabase = couchServer.connectDatabase(self.couchDBName)
        generatorDoc = makeGeneratorConfig(configDatabase)
        processorDocs = makeProcessingConfigs(configDatabase)

        userName = '******'
        groupName = 'Li'
        teamName = 'Tang'
        schema = utils.getSchema(userName=userName)
        schema["CouchURL"] = os.environ["COUCHURL"]
        schema["CouchDBName"] = self.couchDBName
        schema["CouchWorkloadDBName"] = self.couchDBName
        schema["SiteWhitelist"] = ["T1_CH_CERN", "T1_US_FNAL"]
        schema["TaskChain"] = 5
        chains = {
            "Task1": {
                "TaskName": "GenSim",
                "ConfigCacheID": generatorDoc,
                "SplittingAlgo": "EventBased",
                "EventsPerJob": 250,
                "RequestNumEvents": 10000,
                "PrimaryDataset": "RelValTTBar"
            },
            "Task2": {
                "TaskName": "DigiHLT",
                "InputTask": "GenSim",
                "InputFromOutputModule": "writeGENSIM",
                "ConfigCacheID": processorDocs['DigiHLT'],
                "SplittingAlgo": "FileBased"
            },
            "Task3": {
                "TaskName": "Reco",
                "InputTask": "DigiHLT",
                "InputFromOutputModule": "writeRAWDIGI",
                "ConfigCacheID": processorDocs['Reco'],
                "SplittingAlgo": "FileBased"
            },
            "Task4": {
                "TaskName": "ALCAReco",
                "InputTask": "Reco",
                "InputFromOutputModule": "writeALCA",
                "ConfigCacheID": processorDocs['ALCAReco'],
                "SplittingAlgo": "FileBased"
            },
            "Task5": {
                "TaskName": "Skims",
                "InputTask": "Reco",
                "InputFromOutputModule": "writeRECO",
                "ConfigCacheID": processorDocs['Skims'],
                "SplittingAlgo": "FileBased",
                "FilesPerJob": 10
            }
        }
        schema.update(chains)
        args = utils.getAndSetupSchema(self,
                                       userName=userName,
                                       groupName=groupName,
                                       teamName=teamName)
        schema.update(args)

        # this is necessary and after all updates to the schema are made,
        # otherwise this item will get overwritten
        schema['RequestType'] = "TaskChain"
        schema["CouchDBName"] = self.couchDBName
        schema["CouchURL"] = os.environ.get("COUCHURL")
        schema["CouchWorkloadDBName"] = self.couchDBName

        result = self.jsonSender.put('request', schema)

        requestName = result[0]['RequestName']
        result = self.jsonSender.get('request/%s' % requestName)
        request = result[0]
        self.assertEqual(request['CMSSWVersion'], schema['CMSSWVersion'])
        self.assertEqual(request['Group'], groupName)
        self.assertEqual(request['Requestor'], userName)

        workload = self.loadWorkload(requestName)
        self.assertEqual(workload.data.request.schema.Task1["EventsPerJob"],
                         250)