示例#1
0
 def __init__(self, config):
     # queue url used in WorkQueueManager
     self.thisAgentUrl = "http://" + config.Agent.hostName + ":5984"
     self.globalBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
     self.localBackend = WorkQueueBackend(config.WorkQueueManager.couchurl)
     self.dbsUtil = DBSBufferUtil()
     self.condorAPI = PyCondorAPI()
示例#2
0
 def __init__(self, config):
     # queue url used in WorkQueueManager
     self.thisAgentUrl = "http://" + config.Agent.hostName + ":5984"
     self.globalBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
     self.localBackend = WorkQueueBackend(config.WorkQueueManager.couchurl)
     self.dbsUtil = DBSBufferUtil()
     self.condorAPI = PyCondorAPI()
示例#3
0
def main():
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    if len(sys.argv) != 2:
        print("You must provide a request name")
        sys.exit(1)

    reqName = sys.argv[1]

    globalWQBackend = WorkQueueBackend(config.WorkloadSummary.couchurl,
                                       db_name="workqueue")
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue")
    localWQInbox = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                    db_name="workqueue_inbox")

    gqDocIDs = globalWQBackend.getElements(RequestName=reqName)
    localDocIDs = localWQBackend.getElements(RequestName=reqName)
    localInboxDocIDs = localWQInbox.getElements(RequestName=reqName)

    createElementsSummary(reqName, gqDocIDs, globalWQBackend.queueUrl)
    createElementsSummary(reqName, localDocIDs, localWQBackend.queueUrl)
    createElementsSummary(reqName, localInboxDocIDs, localWQInbox.queueUrl)

    sys.exit(0)
示例#4
0
    def setUp(self):
        self.testInit = TestInit('CouchWorkQueueTest')
        self.testInit.setLogging()
        self.testInit.setupCouch('wq_backend_test_inbox', 'WorkQueue')
        self.testInit.setupCouch('wq_backend_test', 'WorkQueue')
        self.couch_db = self.testInit.couch.couchServer.connectDatabase('wq_backend_test')
        self.backend = WorkQueueBackend(db_url = self.testInit.couchUrl,
                                        db_name = 'wq_backend_test',
                                        inbox_name = 'wq_backend_test_inbox')

        self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)
def getProblematicRequests():
    """
    _getProblematicRequests_
    """
    badWorkflows = []
    backend = WorkQueueBackend('https://cmsweb.cern.ch/couchdb')
    workflowsToCheck = backend.getInboxElements(OpenForNewData = True)
    for element in workflowsToCheck:
        childrenElements = backend.getElementsForParent(element)
        if not len(childrenElements):
            badWorkflows.append(element)
    return badWorkflows
示例#6
0
def getProblematicRequests():
    """
    _getProblematicRequests_
    """
    badWorkflows = []
    backend = WorkQueueBackend('https://cmsweb.cern.ch/couchdb')
    workflowsToCheck = backend.getInboxElements(OpenForNewData=True)
    for element in workflowsToCheck:
        childrenElements = backend.getElementsForParent(element)
        if not len(childrenElements):
            badWorkflows.append(element)
    return badWorkflows
示例#7
0
def checkGlobalWQStatus(config, status):
    """
    Given a WorkQueueElement status, query central workqueue database for
    all elements in a given status and that were acquired by this agent.
    """
    agentUrl = "http://" + socket.gethostname() + ":5984"

    backend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    elements = backend.getElements(status=status, ChildQueueUrl=agentUrl)

    for elem in elements:
        updatedIn = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(float(elem.updatetime)))
        print("id: %s\tRequestName: %s\tStatus: %s\t\tUpdatedIn: %s" % (
            elem.id, elem['RequestName'], elem['Status'], updatedIn))
    print("Elements matching the criteria (%s, %s) are: %d" % (status, agentUrl, len(elements)))
    return
示例#8
0
 def setUp(self):
     self.testInit = TestInit('CouchWorkQueueTest')
     self.testInit.setLogging()
     self.testInit.setupCouch('wq_backend_test_inbox', 'WorkQueue')
     self.testInit.setupCouch('wq_backend_test', 'WorkQueue')
     self.testInit.setupCouch('wq_backend_test_parent', 'WorkQueue')
     self.couch_db = self.testInit.couch.couchServer.connectDatabase(
         'wq_backend_test')
     self.backend = WorkQueueBackend(
         db_url=self.testInit.couchUrl,
         db_name='wq_backend_test',
         inbox_name='wq_backend_test_inbox',
         parentQueue='%s/%s' %
         (self.testInit.couchUrl, 'wq_backend_test_parent'))
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)
示例#9
0
def checkGlobalWQStatus(config, status):
    """
    Given a WorkQueueElement status, query central workqueue database for
    all elements in a given status and that were acquired by this agent.
    """
    agentUrl = "http://" + socket.gethostname() + ":5984"

    backend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    elements = backend.getElements(status=status, ChildQueueUrl=agentUrl)

    for elem in elements:
        updatedIn = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(float(elem.updatetime)))
        print("id: %s\tRequestName: %s\tStatus: %s\t\tUpdatedIn: %s" % (
        elem.id, elem['RequestName'], elem['Status'], updatedIn))
    print("Elements matching the criteria (%s, %s) are: %d" % (status, agentUrl, len(elements)))
    return
示例#10
0
def killWorkflowAgent(WorkflowName):
    """
    Cancel work for a given workflow - delete in wmbs, delete from workqueue db, set canceled in inbox
    """
    # get configuration file path
    if not os.environ.has_key("WMAGENT_CONFIG"):
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    # load config
    wmConfig = loadConfigurationFile(os.environ['WMAGENT_CONFIG'])
    wqManager = wmConfig.section_('WorkQueueManager')

    couchUrl = wqManager.couchurl
    dbname = wqManager.dbname
    inboxDatabase = wqManager.inboxDatabase
    parentQueueCouchUrl = wqManager.queueParams['ParentQueueCouchUrl']

    # Creates backend
    backend = WorkQueueBackend(couchUrl, dbname, inboxDatabase,
                               parentQueueCouchUrl)

    args = {}
    args['RequestName'] = WorkflowName
    elements = backend.getElements(**args)

    # take wf from args in case no elements exist for workflow (i.e. work was negotiating)
    requestNames = set([x['RequestName'] for x in elements]) | set(
        [wf for wf in [WorkflowName]])
    if not requestNames:
        print 'Workflow is not at the backend'

    inbox_elements = []
    for wf in requestNames:
        inbox_elements.extend(backend.getInboxElements(WorkflowName=wf))

    print "Canceling work for workflow: %s" % (requestNames)
    for workflow in requestNames:
        try:
            connectToDB()
            jobDumpConfig = wmConfig
            bossAirConfig = wmConfig
            killWorkflow(workflow, jobDumpConfig, bossAirConfig)
        except Exception, ex:
            print 'Aborting %s wmbs subscription failed: %s' % (workflow,
                                                                str(ex))
示例#11
0
def checkLocalWQStatus(config, status):
    """
    Given a WorkQueueElement status, query local workqueue and workqueue_inbox
    database for all elements in a given status and that were acquired by this agent.
    """
    backend = WorkQueueBackend(config.WorkQueueManager.couchurl)

    for db in ("workqueue", "workqueue_inbox"):
        if db == "workqueue":
            elements = backend.getElements(status=status)
        else:
            elements = backend.getInboxElements(status=status)

        for elem in elements:
            updatedIn = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(float(elem.updatetime)))
            print("id: %s\tRequestName: %s\tStatus: %s\t\tUpdatedIn: %s" % (
                elem.id, elem['RequestName'], elem['Status'], updatedIn))
        print("Elements matching the criteria (%s, %s) are: %d" % (status, db, len(elements)))
    return
示例#12
0
def checkLocalWQStatus(config, status):
    """
    Given a WorkQueueElement status, query local workqueue and workqueue_inbox
    database for all elements in a given status and that were acquired by this agent.
    """
    backend = WorkQueueBackend(config.WorkQueueManager.couchurl)

    for db in ("workqueue", "workqueue_inbox"):
        if db == "workqueue":
            elements = backend.getElements(status=status)
        else:
            elements = backend.getInboxElements(status=status)

        for elem in elements:
            updatedIn = time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(float(elem.updatetime)))
            print("id: %s\tRequestName: %s\tStatus: %s\t\tUpdatedIn: %s" % (
            elem.id, elem['RequestName'], elem['Status'], updatedIn))
        print("Elements matching the criteria (%s, %s) are: %d" % (status, db, len(elements)))
    return
def killWorkflowAgent(WorkflowName):
    """
    Cancel work for a given workflow - delete in wmbs, delete from workqueue db, set canceled in inbox
    """
    # get configuration file path
    if not os.environ.has_key("WMAGENT_CONFIG"):
        os.environ["WMAGENT_CONFIG"] = "/data/srv/wmagent/current/config/wmagent/config.py"

    # load config
    wmConfig = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])
    wqManager = wmConfig.section_("WorkQueueManager")

    couchUrl = wqManager.couchurl
    dbname = wqManager.dbname
    inboxDatabase = wqManager.inboxDatabase
    parentQueueCouchUrl = wqManager.queueParams["ParentQueueCouchUrl"]

    # Creates backend
    backend = WorkQueueBackend(couchUrl, dbname, inboxDatabase, parentQueueCouchUrl)

    args = {}
    args["RequestName"] = WorkflowName
    elements = backend.getElements(**args)

    # take wf from args in case no elements exist for workflow (i.e. work was negotiating)
    requestNames = set([x["RequestName"] for x in elements]) | set([wf for wf in [WorkflowName]])
    if not requestNames:
        print "Workflow is not at the backend"

    inbox_elements = []
    for wf in requestNames:
        inbox_elements.extend(backend.getInboxElements(WorkflowName=wf))

    print "Canceling work for workflow: %s" % (requestNames)
    for workflow in requestNames:
        try:
            connectToDB()
            jobDumpConfig = wmConfig
            bossAirConfig = wmConfig
            killWorkflow(workflow, jobDumpConfig, bossAirConfig)
        except Exception, ex:
            print "Aborting %s wmbs subscription failed: %s" % (workflow, str(ex))
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    args = sys.argv[1:]
    if not len(args) == 1:
        print "usage: python syncPrioReqMgrxGQ.py <text_file_with_the_workflow_names>"
        sys.exit(0)
    inputFile = args[0]
    with open(inputFile) as f:
        listWorkflows = [x.rstrip('\n') for x in f.readlines()]

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    wfDBReader = RequestDBReader(
        config.AnalyticsDataCollector.centralRequestDBURL,
        couchapp=config.AnalyticsDataCollector.RequestCouchApp)

    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)

    workflowsDict = wfDBReader.getRequestByNames(listWorkflows)

    for wf, details in workflowsDict.iteritems():
        print "wf: %s and prio: %s" % (wf, details['RequestPriority'])
        wqDocs = wqBackend.getElements(WorkflowName=wf)
        docIds = [
            elem._id for elem in wqDocs if elem['Status'] == 'Available'
            and elem['Priority'] != details['RequestPriority']
        ]
        if docIds:
            print "Changing the priority of the following available docs: %s" % docIds
            wqBackend.updateElements(*docIds,
                                     Priority=details['RequestPriority'])
        else:
            print " there is nothing to update for this workflow."
def main():
    """
    Whatever
    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    if len(sys.argv) != 2:
        print("You must provide a request name")
        sys.exit(1)

    reqName = sys.argv[1]

    globalWQBackend = WorkQueueBackend(config.WorkloadSummary.couchurl, db_name="workqueue")
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue")
    localWQInbox = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue_inbox")

    gqDocIDs = globalWQBackend.getElements(RequestName=reqName)
    localDocIDs = localWQBackend.getElements(RequestName=reqName)
    localInboxDocIDs = localWQInbox.getElements(RequestName=reqName)

    createElementsSummary(reqName, gqDocIDs, globalWQBackend.queueUrl)
    createElementsSummary(reqName, localDocIDs, localWQBackend.queueUrl)
    createElementsSummary(reqName, localInboxDocIDs, localWQInbox.queueUrl)
    
    sys.exit(0)
def main():
    """
    Whatever
    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Get local workqueue and workqueue_inbox docs
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue")
    localWQInboxDB = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue_inbox")
    wqDocIDs = localWQBackend.getElements()
    wqInboxDocIDs = localWQInboxDB.getElements()

    # Build and print a summary of these elements
    logging.info("************* LOCAL workqueue elements summary ************")
    foundStatus = createElementsSummary(wqInboxDocIDs, 'workqueue_inbox')
    foundStatus = createElementsSummary(wqDocIDs, 'workqueue')

    # Now investigate docs in the workqueue database
    for status in foundStatus:
        logging.info("\n************* workqueue elements summary by status: %s ************", status)
        elemByStatus = [x for x in wqDocIDs if x['Status'] == status]
        byStatusSummary(elemByStatus, localWQInboxDB=localWQInboxDB)

    # time to look up at central global queue
    logging.info("\n************* GLOBAL workqueue elements summary ************")
    globalWQBackend = WorkQueueBackend(config.WorkloadSummary.couchurl, db_name="workqueue")
    gqDocIDs = globalWQBackend.getElements(status='Available')
    _ = createElementsSummary(gqDocIDs, 'workqueue')
    #logging.info("Found %d 'Available' docs in global workqueue database", len(gqDocIDs))
    byStatusSummary(gqDocIDs)
    
    sys.exit(0)
 def setUp(self):
     self.testInit = TestInit('CouchWorkQueueTest')
     self.testInit.setLogging()
     self.testInit.setupCouch('wq_backend_test_inbox', 'WorkQueue')
     self.testInit.setupCouch('wq_backend_test', 'WorkQueue')
     self.testInit.setupCouch('wq_backend_test_parent', 'WorkQueue')
     self.couch_db = self.testInit.couch.couchServer.connectDatabase('wq_backend_test')
     self.backend = WorkQueueBackend(db_url=self.testInit.couchUrl,
                                     db_name='wq_backend_test',
                                     inbox_name='wq_backend_test_inbox',
                                     parentQueue='%s/%s' % (self.testInit.couchUrl, 'wq_backend_test_parent'))
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)
示例#18
0
    def __init__(self, logger=None, dbi=None, **params):

        WorkQueueBase.__init__(self, logger, dbi)
        self.parent_queue = None
        self.params = params

        # config argument (within params) shall be reference to
        # Configuration instance (will later be checked for presence of "Alert")
        self.config = params.get("Config", None)
        self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
        if not self.params.get('CouchUrl'):
            raise RuntimeError, 'CouchUrl config value mandatory'
        self.params.setdefault('DbName', 'workqueue')
        self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
        self.params.setdefault('ParentQueueCouchUrl',
                               None)  # We get work from here

        self.backend = WorkQueueBackend(self.params['CouchUrl'],
                                        self.params['DbName'],
                                        self.params['InboxDbName'],
                                        self.params['ParentQueueCouchUrl'],
                                        self.params.get('QueueURL'),
                                        logger=self.logger)
        if self.params.get('ParentQueueCouchUrl'):
            try:
                self.parent_queue = WorkQueueBackend(
                    self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
                    self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])
            except IndexError, ex:
                # Probable cause: Someone didn't put the global WorkQueue name in
                # the ParentCouchUrl
                msg = "Parsing failure for ParentQueueCouchUrl - probably missing dbname in input\n"
                msg += "Exception: %s\n" % str(ex)
                msg += str("ParentQueueCouchUrl: %s\n" %
                           self.params['ParentQueueCouchUrl'])
                self.logger.error(msg)
                raise WorkQueueError(msg)
            self.params['ParentQueueCouchUrl'] = self.parent_queue.queueUrl
def main():
    """
    Whatever
    """
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Get local workqueue and workqueue_inbox docs
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue")
    localWQInboxDB = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue_inbox")
    wqDocIDs = localWQBackend.getElements()
    wqInboxDocIDs = localWQInboxDB.getElements()

    # Build and print a summary of these elements
    logging.info("************* LOCAL workqueue elements summary ************")
    foundStatus = createElementsSummary(wqInboxDocIDs, 'workqueue_inbox')
    foundStatus = createElementsSummary(wqDocIDs, 'workqueue')

    # Now investigate docs in the workqueue database
    for status in foundStatus:
        logging.info(
            "\n************* workqueue elements summary by status: %s ************",
            status)
        elemByStatus = [x for x in wqDocIDs if x['Status'] == status]
        byStatusSummary(elemByStatus, localWQInboxDB=localWQInboxDB)

    # time to look up at central global queue
    logging.info(
        "\n************* GLOBAL workqueue elements summary ************")
    globalWQBackend = WorkQueueBackend(config.WorkloadSummary.couchurl,
                                       db_name="workqueue")
    gqDocIDs = globalWQBackend.getElements(status='Available')
    _ = createElementsSummary(gqDocIDs, 'workqueue')
    #logging.info("Found %d 'Available' docs in global workqueue database", len(gqDocIDs))
    byStatusSummary(gqDocIDs)

    sys.exit(0)
示例#20
0
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
#    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
#    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, 
                                 couchapp = config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name = "workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = ["failed", "epic-FAILED", "completed", "closed-out",
                  "announced", "aborted", "aborted-completed", "rejected",
                  "normal-archived", "aborted-archived", "rejected-archived"]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
#            options = {'include_docs': False}
            date_range = {'startkey': [2015,5,15,0,0,0], 'endkey': [2015,5,26,0,0,0]}
#            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                  finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName = wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(*[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
示例#21
0
class DrainStatusAPI(object):
    """
    Provides methods for querying dbs and condor for drain statistics
    """
    def __init__(self, config):
        # queue url used in WorkQueueManager
        self.thisAgentUrl = "http://" + config.Agent.hostName + ":5984"
        self.globalBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
        self.localBackend = WorkQueueBackend(config.WorkQueueManager.couchurl)
        self.dbsUtil = DBSBufferUtil()
        self.condorAPI = PyCondorAPI()

    def collectDrainInfo(self):
        """
        Call methods to check the drain status
        """
        results = {}
        results['workflows_completed'] = self.checkWorkflows()

        # if workflows are completed, collect additional drain statistics
        if results['workflows_completed']:
            results['upload_status'] = self.checkFileUploadStatus()
            results['condor_status'] = self.checkCondorStates()
            results['local_wq_status'] = self.checkLocalWQStatus(dbname="workqueue")
            results['local_wqinbox_status'] = self.checkLocalWQStatus(dbname="workqueue_inbox")
            results['global_wq_status'] = self.checkGlobalWQStatus()

        return results

    def checkWorkflows(self):
        """
        Check to see if all workflows have a 'completed' status
        """
        results = self.dbsUtil.isAllWorkflowCompleted()
        return results

    def checkCondorStates(self):
        """
        Check idle and running jobs in Condor
        """
        results = {}
        queries = [["1", "idle"], ["2", "running"]]

        for query in queries:
            jobs = self.condorAPI.getCondorJobs("JobStatus=="+query[0], [])
            # if there is an error, report it instead of the length of an empty list
            if jobs is None:
                results[query[1]] = "unknown (schedd query error)"
            else:
                results[query[1]] = len(jobs)

        return results

    def checkFileUploadStatus(self):
        """
        Check file upload status:
            Blocks open in DBS
            Files not uploaded in DBS
            Files not uploaded to Phedex
        """
        results = {}
        results['dbs_open_blocks'] = self.dbsUtil.countOpenBlocks()
        results['dbs_notuploaded'] = self.dbsUtil.countFilesByStatus(status="NOTUPLOADED")
        results['phedex_notuploaded'] = self.dbsUtil.countPhedexNotUploaded()
        return results

    def checkLocalWQStatus(self, dbname):
        """
        Query local WorkQueue workqueue/workqueue_inbox database to see whether
        there are any active elements in this agent.
        """
        results = {}

        for st in ('Available', 'Negotiating', 'Acquired', 'Running'):
            if dbname == "workqueue":
                elements = self.localBackend.getElements(status=st, returnIdOnly=True)
            else:
                elements = self.localBackend.getInboxElements(status=st, returnIdOnly=True)
            results[st] = len(elements)
        return results

    def checkGlobalWQStatus(self):
        """
        Query Global WorkQueue workqueue database to see whether there are
        any active elements set to this agent.
        """
        results = {}

        for st in ("Acquired", "Running"):
            elements = self.globalBackend.getElements(status=st, returnIdOnly=True,
                                                      ChildQueueUrl=self.thisAgentUrl)
            results[st] = len(elements)
        return results
示例#22
0
class WorkQueueBackendTest(unittest.TestCase):

    def setUp(self):
        self.testInit = TestInit('CouchWorkQueueTest')
        self.testInit.setLogging()
        self.testInit.setupCouch('wq_backend_test_inbox', 'WorkQueue')
        self.testInit.setupCouch('wq_backend_test', 'WorkQueue')
        self.testInit.setupCouch('wq_backend_test_parent', 'WorkQueue')
        self.couch_db = self.testInit.couch.couchServer.connectDatabase('wq_backend_test')
        self.backend = WorkQueueBackend(db_url = self.testInit.couchUrl,
                                        db_name = 'wq_backend_test',
                                        inbox_name = 'wq_backend_test_inbox',
                                        parentQueue = '%s/%s' % (self.testInit.couchUrl, 'wq_backend_test_parent'))

        self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)


    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.tearDownCouch()

    def testPriority(self):
        """Element priority and ordering handled correctly"""
        element = WorkQueueElement(RequestName = 'backend_test',
                                   WMSpec = self.processingSpec,
                                   Status = 'Available',
                                   Jobs = 10, Priority = 1)
        highprielement = WorkQueueElement(RequestName = 'backend_test_high',
                                          WMSpec = self.processingSpec,
                                          Status = 'Available', Jobs = 10,
                                          Priority = 100)
        element2 = WorkQueueElement(RequestName = 'backend_test_2',
                                    WMSpec = self.processingSpec,
                                    Status = 'Available',
                                    Jobs = 10, Priority = 1)
        lowprielement = WorkQueueElement(RequestName = 'backend_test_low',
                                         WMSpec = self.processingSpec,
                                         Status = 'Available',
                                         Jobs = 10, Priority = 0.1)
        self.backend.insertElements([element])
        self.backend.availableWork({'place' : 1000}, {})
        # timestamp in elements have second coarseness, 2nd element must
        # have a higher timestamp to force it after the 1st
        time.sleep(1)
        self.backend.insertElements([lowprielement, element2, highprielement])
        self.backend.availableWork({'place' : 1000}, {})
        work = self.backend.availableWork({'place' : 1000}, {})
        # order should be high to low, with the standard elements in the order
        # they were queueud
        self.assertEqual([x['RequestName'] for x in work[0]],
                         ['backend_test_high', 'backend_test', 'backend_test_2', 'backend_test_low'])


    def testDuplicateInsertion(self):
        """Try to insert elements multiple times"""
        element1 = CouchWorkQueueElement(self.couch_db,
                                         elementParams = {'RequestName' : 'backend_test',
                                                          'WMSpec' : self.processingSpec,
                                                          'Status' : 'Available',
                                                          'Jobs' : 10,
                                                          'Inputs' : {self.processingSpec.listInputDatasets()[0] + '#1' : []}})
        element2 = CouchWorkQueueElement(self.couch_db,
                                         elementParams = {'RequestName' : 'backend_test',
                                                          'WMSpec' : self.processingSpec,
                                                          'Status' : 'Available',
                                                          'Jobs' : 20,
                                                          'Inputs' : {self.processingSpec.listInputDatasets()[0] + '#2' : []}})
        self.backend.insertElements([element1, element2])
        self.backend.insertElements([element1, element2])
        # check no duplicates and no conflicts
        self.assertEqual(len(self.backend.db.allDocs()['rows']), 4) # design doc + workflow + 2 elements
        self.assertEqual(self.backend.db.loadView('WorkQueue', 'conflicts')['total_rows'], 0)

    def testReplicationStatus(self):
        """
        _testReplicationStatus_

        Check that we can catch replication errors,
        the checkReplicationStatus returns True if there is no error.
        """
        self.backend.pullFromParent(continuous = True)
        self.backend.sendToParent(continuous = True)
        self.assertTrue(self.backend.checkReplicationStatus())
        self.backend.pullFromParent(continuous = True, cancel = True)
        self.backend.sendToParent(continuous = True, cancel = True)
        self.assertFalse(self.backend.checkReplicationStatus())
        self.backend.pullFromParent(continuous = True)
        self.backend.sendToParent(continuous = True)
        self.assertTrue(self.backend.checkReplicationStatus())
示例#23
0
For this specific case, it looks for StoreResults elements sitting
in the Available status. It then queries DBS phys03 instance, fetches
the input data location and updates the GQE with the new location.

Run it from the agent, with the agent environment loaded
"""
from __future__ import print_function

import sys

from WMCore.Services.CRIC.CRIC import CRIC
from WMCore.Services.DBS.DBSReader import DBSReader
from WMCore.WorkQueue.WorkQueueBackend import WorkQueueBackend

backend = WorkQueueBackend('https://cmsweb.cern.ch/couchdb')


def isDataset(inputData):
    """Check whether we're handling a block or a dataset"""
    if '#' in inputData.split('/')[-1]:
        return False
    return True


def getProblematicRequests():
    """
    _getProblematicRequests_
    """
    elements = backend.getElements(status="Available", TaskName="StoreResults")
    print("Found %d StoreResults GQE elements in Available status" %
示例#24
0
def main():
    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'
    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    print("Work in progress! It might create document conflicts as it is!")
    sys.exit(10)

    if len(sys.argv) != 2:
        print("You must provide a request name")
        sys.exit(1)
    reqName = sys.argv[1]
    childQueue = config.WorkQueueManager.queueParams['QueueURL']

    logger = setupLogger()
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue",
                                      logger=logger)
    localElems = localWQBackend.getElements(WorkflowName=reqName)
    localInboxElems = localWQBackend.getInboxElements(WorkflowName=reqName)

    docsToUpdate = []
    logger.info(
        "** Local workqueue_inbox elements for workflow %s and agent %s",
        reqName, childQueue)
    for elem in localInboxElems:
        if elem['Status'] == "Acquired":
            logger.info("Element id: %s has status: %s", elem.id,
                        elem['Status'])
            elem['Status'] = 'Available'
            elem['ChildQueueUrl'] = None
            docsToUpdate.append(elem)
    if docsToUpdate:
        var = raw_input(
            "Found %d inbox elements to update, shall we proceed (Y/N): " %
            len(docsToUpdate))
        if var == "Y":
            resp = localWQBackend.saveElements(*docsToUpdate)
            logger.info("    update response: %s", resp)

    docsToUpdate = []
    logger.info("** Local workqueue elements for workflow %s and agent %s",
                reqName, childQueue)
    for elem in localElems:
        if elem['Status'] == "Available":
            logger.info("Element id: %s has status: %s", elem.id,
                        elem['Status'])
            docsToUpdate.append(elem._id)
    if docsToUpdate:
        var = raw_input(
            "Found %d elements to delete, shall we proceed (Y/N): " %
            len(docsToUpdate))
        if var == "Y":
            for elem in docsToUpdate:
                elem.delete()
            resp = docsToUpdate[0]._couch.commit()
            logger.info("    deletion response: %s", resp)

    print("Done!")

    sys.exit(0)
def main():
    """
    It will either delete docs in couchdb for the workflow you
    have provided or it will loop over the final (or almost final)
    states and ask for your permission to delete them.
    """
    wfName = sys.argv[1] if len(sys.argv) == 2 else []

    if 'WMAGENT_CONFIG' not in os.environ:
        os.environ[
            'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py'

    config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"])

    # Instantiating central services (couch stuff)
    #    print "Central Couch URL  : %s" % config.WorkloadSummary.couchurl
    #    print "Central ReqMgr URL  : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL

    wfDBReader = RequestDBReader(
        config.AnalyticsDataCollector.centralRequestDBURL,
        couchapp=config.AnalyticsDataCollector.RequestCouchApp)

    # Central services
    wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
    wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl)

    # Local services
    localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl,
                                      db_name="workqueue_inbox")
    localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl)

    statusList = [
        "failed", "epic-FAILED", "completed", "closed-out", "announced",
        "aborted", "aborted-completed", "rejected", "normal-archived",
        "aborted-archived", "rejected-archived"
    ]

    for stat in final_status:
        # retrieve list of workflows in each status
        if not wfName:
            #            options = {'include_docs': False}
            date_range = {
                'startkey': [2015, 5, 15, 0, 0, 0],
                'endkey': [2015, 5, 26, 0, 0, 0]
            }
            #            finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range)
            tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range)
            #print "Found %d wfs in status: %s" %(len(finalWfs), stat)
            finalWfs = []
            for wf, content in tempWfs.iteritems():
                if content['RequestStatus'] in statusList:
                    finalWfs.append(wf)
            print "Found %d wfs in not in active state" % len(finalWfs)
        else:
            finalWfs = [wfName]
            tempWfs = wfDBReader.getRequestByNames(wfName, True)
            print "Checking %s with status '%s'." % (
                wfName, tempWfs[wfName]['RequestStatus'])

        wqDocs, wqInboxDocs = [], []
        localWQDocs, localWQInboxDocs = [], []
        for counter, wf in enumerate(finalWfs):
            if counter % 100 == 0:
                print "%d wfs queried ..." % counter
            # check whether there are workqueue docs
            wqDocIDs = wqBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                wqDocs.append(wqDocIDs)

            # check whether there are workqueue_inbox docs
            if wqInboxDB.documentExists(wf):
                print "Found workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                # then retrieve the document
                wqInboxDoc = wqInboxDB.document(wf)
                wqInboxDocs.append(wqInboxDoc)

            # check local queue
            wqDocIDs = localWQBackend.getElements(WorkflowName=wf)
            if wqDocIDs:
                print "Found %d local workqueue docs for %s, status %s" % (
                    len(wqDocIDs), wf, tempWfs[wf]['RequestStatus'])
                print wqDocIDs
                localWQDocs.append(wqDocIDs)
            if localWQInboxDB.documentExists(wf):
                print "Found local workqueue_inbox doc for %s, status %s" % (
                    wf, tempWfs[wf]['RequestStatus'])
                wqInboxDoc = localWQInboxDB.document(wf)
                print wqInboxDoc
                localWQInboxDocs.append(wqInboxDoc)

    # TODO TODO TODO for the moment only deletes for a specific workflow
    if wfName:
        var = raw_input("\nCan we delete all these documents (Y/N)? ")
        if var == "Y":
            # deletes workqueue_inbox doc
            if wqInboxDoc:
                print "Deleting workqueue_inbox id %s and %s" % (
                    wqInboxDoc['_id'], wqInboxDoc['_rev'])
                wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev'])

            # deletes workqueue docs
            if wqDocIDs:
                print "Deleting workqueue docs %s" % wqDocIDs
                wqBackend.deleteElements(
                    *[x for x in wqDocIDs if x['RequestName'] in wfName])
        else:
            print "You are the boss, aborting it ...\n"
示例#26
0
class WorkQueueBackendTest(unittest.TestCase):
    def setUp(self):
        self.testInit = TestInit('CouchWorkQueueTest')
        self.testInit.setLogging()
        self.testInit.setupCouch('wq_backend_test_inbox', 'WorkQueue')
        self.testInit.setupCouch('wq_backend_test', 'WorkQueue')
        self.testInit.setupCouch('wq_backend_test_parent', 'WorkQueue')
        self.couch_db = self.testInit.couch.couchServer.connectDatabase(
            'wq_backend_test')
        self.backend = WorkQueueBackend(
            db_url=self.testInit.couchUrl,
            db_name='wq_backend_test',
            inbox_name='wq_backend_test_inbox',
            parentQueue='%s/%s' %
            (self.testInit.couchUrl, 'wq_backend_test_parent'))
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        self.processingSpec = rerecoWorkload('testProcessing', rerecoArgs)

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.tearDownCouch()

    def testPriority(self):
        """Element priority and ordering handled correctly"""
        element = WorkQueueElement(RequestName='backend_test',
                                   WMSpec=self.processingSpec,
                                   Status='Available',
                                   Jobs=10,
                                   Priority=1)
        highprielement = WorkQueueElement(RequestName='backend_test_high',
                                          WMSpec=self.processingSpec,
                                          Status='Available',
                                          Jobs=10,
                                          Priority=100)
        element2 = WorkQueueElement(RequestName='backend_test_2',
                                    WMSpec=self.processingSpec,
                                    Status='Available',
                                    Jobs=10,
                                    Priority=1)
        element3 = WorkQueueElement(RequestName='backend_test_3',
                                    WMSpec=self.processingSpec,
                                    Status='Available',
                                    Jobs=10,
                                    Priority=1)
        lowprielement = WorkQueueElement(RequestName='backend_test_low',
                                         WMSpec=self.processingSpec,
                                         Status='Available',
                                         Jobs=10,
                                         Priority=0.1)
        self.backend.insertElements([element])
        self.backend.availableWork({'place': 1000}, {})
        # timestamp in elements have second coarseness, 2nd element must
        # have a higher timestamp to force it after the 1st
        time.sleep(1)
        self.backend.insertElements([lowprielement, element2, highprielement])
        self.backend.availableWork({'place': 1000}, {})
        time.sleep(1)
        self.backend.insertElements([element3])
        work = self.backend.availableWork({'place': 1000}, {})
        # order should be high to low, with the standard elements in the order
        # they were queueud
        self.assertEqual([x['RequestName'] for x in work[0]], [
            'backend_test_high', 'backend_test', 'backend_test_2',
            'backend_test_3', 'backend_test_low'
        ])

    def testDuplicateInsertion(self):
        """Try to insert elements multiple times"""
        element1 = CouchWorkQueueElement(
            self.couch_db,
            elementParams={
                'RequestName': 'backend_test',
                'WMSpec': self.processingSpec,
                'Status': 'Available',
                'Jobs': 10,
                'Inputs': {
                    self.processingSpec.listInputDatasets()[0] + '#1': []
                }
            })
        element2 = CouchWorkQueueElement(
            self.couch_db,
            elementParams={
                'RequestName': 'backend_test',
                'WMSpec': self.processingSpec,
                'Status': 'Available',
                'Jobs': 20,
                'Inputs': {
                    self.processingSpec.listInputDatasets()[0] + '#2': []
                }
            })
        self.backend.insertElements([element1, element2])
        self.backend.insertElements([element1, element2])
        # check no duplicates and no conflicts
        self.assertEqual(len(self.backend.db.allDocs()['rows']),
                         4)  # design doc + workflow + 2 elements
        self.assertEqual(
            self.backend.db.loadView('WorkQueue', 'conflicts')['total_rows'],
            0)
示例#27
0
    def __init__(self, logger = None, dbi = None, **params):

        WorkQueueBase.__init__(self, logger, dbi)
        self.parent_queue = None
        self.params = params

        # config argument (within params) shall be reference to
        # Configuration instance (will later be checked for presence of "Alert")
        self.config = params.get("Config", None)
        self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
        if not self.params.get('CouchUrl'):
            raise RuntimeError, 'CouchUrl config value mandatory'
        self.params.setdefault('DbName', 'workqueue')
        self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
        self.params.setdefault('ParentQueueCouchUrl', None) # We get work from here

        self.backend = WorkQueueBackend(self.params['CouchUrl'], self.params['DbName'],
                                        self.params['InboxDbName'],
                                        self.params['ParentQueueCouchUrl'], self.params.get('QueueURL'),
                                        logger = self.logger)
        if self.params.get('ParentQueueCouchUrl'):
            self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
                                                 self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])

        self.params.setdefault("GlobalDBS",
                               "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.params.setdefault('QueueDepth', 2) # when less than this locally
        self.params.setdefault('LocationRefreshInterval', 600)
        self.params.setdefault('FullLocationRefreshInterval', 7200)
        self.params.setdefault('TrackLocationOrSubscription', 'subscription')
        self.params.setdefault('ReleaseIncompleteBlocks', False)
        self.params.setdefault('ReleaseRequireSubscribed', True)
        self.params.setdefault('PhEDExEndpoint', None)
        self.params.setdefault('PopulateFilesets', True)
        self.params.setdefault('LocalQueueFlag', True)

        self.params.setdefault('JobDumpConfig', None)
        self.params.setdefault('BossAirConfig', None)

        self.params['QueueURL'] = self.backend.queueUrl # url this queue is visible on
                                    # backend took previous QueueURL and sanitized it
        self.params.setdefault('WMBSUrl', None) # this will only be set on local Queue
        self.params.setdefault('Teams', [''])
        self.params.setdefault('DrainMode', False)
        if self.params.get('CacheDir'):
            try:
                os.makedirs(self.params['CacheDir'])
            except OSError:
                pass
        elif self.params.get('PopulateFilesets'):
            raise RuntimeError, 'CacheDir mandatory for local queue'

        self.params.setdefault('SplittingMapping', {})
        self.params['SplittingMapping'].setdefault('DatasetBlock',
                                                   {'name': 'Block',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('MonteCarlo',
                                                   {'name': 'MonteCarlo',
                                                    'args':{}}
                                                   )
        self.params['SplittingMapping'].setdefault('Dataset',
                                                   {'name': 'Dataset',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('Block',
                                                   {'name': 'Block',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('ResubmitBlock',
                                                   {'name': 'ResubmitBlock',
                                                    'args': {}}
                                                  )
        
        self.params.setdefault('EndPolicySettings', {})

        assert(self.params['TrackLocationOrSubscription'] in ('subscription',
                                                              'location'))
        # Can only release blocks on location
        if self.params['TrackLocationOrSubscription'] == 'location':
            if self.params['SplittingMapping']['DatasetBlock']['name'] != 'Block':
                raise RuntimeError, 'Only blocks can be released on location'

        if self.params.get('PhEDEx'):
            self.phedexService = self.params['PhEDEx']
        else:
            phedexArgs = {}
            if self.params.get('PhEDExEndpoint'):
                phedexArgs['endpoint'] = self.params['PhEDExEndpoint']
            self.phedexService = PhEDEx(phedexArgs)

        if self.params.get('SiteDB'):
            self.SiteDB = self.params['SiteDB']
        else:
            self.SiteDB = SiteDB()

        if type(self.params['Teams']) in types.StringTypes:
            self.params['Teams'] = [x.strip() for x in \
                                    self.params['Teams'].split(',')]

        self.dataLocationMapper = WorkQueueDataLocationMapper(self.logger, self.backend,
                                                              phedex = self.phedexService,
                                                              sitedb = self.SiteDB,
                                                              locationFrom = self.params['TrackLocationOrSubscription'],
                                                              incompleteBlocks = self.params['ReleaseIncompleteBlocks'],
                                                              requireBlocksSubscribed = not self.params['ReleaseIncompleteBlocks'],
                                                              fullRefreshInterval = self.params['FullLocationRefreshInterval'],
                                                              updateIntervalCoarseness = self.params['LocationRefreshInterval'])

        # initialize alerts sending client (self.sendAlert() method)
        # usage: self.sendAlert(levelNum, msg = msg) ; level - integer 1 .. 10
        #    1 - 4 - lower levels ; 5 - 10 higher levels
        preAlert, self.alertSender = \
            alertAPI.setUpAlertsMessaging(self, compName = "WorkQueueManager")
        self.sendAlert = alertAPI.getSendAlert(sender = self.alertSender,
                                               preAlert = preAlert)

        self.logger.debug("WorkQueue created successfully")
示例#28
0
class WorkQueue(WorkQueueBase):
    """
    _WorkQueue_

    WorkQueue object - interface to WorkQueue functionality.
    """
    def __init__(self, logger = None, dbi = None, **params):

        WorkQueueBase.__init__(self, logger, dbi)
        self.parent_queue = None
        self.params = params

        # config argument (within params) shall be reference to
        # Configuration instance (will later be checked for presence of "Alert")
        self.config = params.get("Config", None)
        self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
        if not self.params.get('CouchUrl'):
            raise RuntimeError, 'CouchUrl config value mandatory'
        self.params.setdefault('DbName', 'workqueue')
        self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
        self.params.setdefault('ParentQueueCouchUrl', None) # We get work from here

        self.backend = WorkQueueBackend(self.params['CouchUrl'], self.params['DbName'],
                                        self.params['InboxDbName'],
                                        self.params['ParentQueueCouchUrl'], self.params.get('QueueURL'),
                                        logger = self.logger)
        if self.params.get('ParentQueueCouchUrl'):
            self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
                                                 self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])

        self.params.setdefault("GlobalDBS",
                               "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
        self.params.setdefault('QueueDepth', 2) # when less than this locally
        self.params.setdefault('LocationRefreshInterval', 600)
        self.params.setdefault('FullLocationRefreshInterval', 7200)
        self.params.setdefault('TrackLocationOrSubscription', 'subscription')
        self.params.setdefault('ReleaseIncompleteBlocks', False)
        self.params.setdefault('ReleaseRequireSubscribed', True)
        self.params.setdefault('PhEDExEndpoint', None)
        self.params.setdefault('PopulateFilesets', True)
        self.params.setdefault('LocalQueueFlag', True)

        self.params.setdefault('JobDumpConfig', None)
        self.params.setdefault('BossAirConfig', None)

        self.params['QueueURL'] = self.backend.queueUrl # url this queue is visible on
                                    # backend took previous QueueURL and sanitized it
        self.params.setdefault('WMBSUrl', None) # this will only be set on local Queue
        self.params.setdefault('Teams', [''])
        self.params.setdefault('DrainMode', False)
        if self.params.get('CacheDir'):
            try:
                os.makedirs(self.params['CacheDir'])
            except OSError:
                pass
        elif self.params.get('PopulateFilesets'):
            raise RuntimeError, 'CacheDir mandatory for local queue'

        self.params.setdefault('SplittingMapping', {})
        self.params['SplittingMapping'].setdefault('DatasetBlock',
                                                   {'name': 'Block',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('MonteCarlo',
                                                   {'name': 'MonteCarlo',
                                                    'args':{}}
                                                   )
        self.params['SplittingMapping'].setdefault('Dataset',
                                                   {'name': 'Dataset',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('Block',
                                                   {'name': 'Block',
                                                    'args': {}}
                                                  )
        self.params['SplittingMapping'].setdefault('ResubmitBlock',
                                                   {'name': 'ResubmitBlock',
                                                    'args': {}}
                                                  )
        
        self.params.setdefault('EndPolicySettings', {})

        assert(self.params['TrackLocationOrSubscription'] in ('subscription',
                                                              'location'))
        # Can only release blocks on location
        if self.params['TrackLocationOrSubscription'] == 'location':
            if self.params['SplittingMapping']['DatasetBlock']['name'] != 'Block':
                raise RuntimeError, 'Only blocks can be released on location'

        if self.params.get('PhEDEx'):
            self.phedexService = self.params['PhEDEx']
        else:
            phedexArgs = {}
            if self.params.get('PhEDExEndpoint'):
                phedexArgs['endpoint'] = self.params['PhEDExEndpoint']
            self.phedexService = PhEDEx(phedexArgs)

        if self.params.get('SiteDB'):
            self.SiteDB = self.params['SiteDB']
        else:
            self.SiteDB = SiteDB()

        if type(self.params['Teams']) in types.StringTypes:
            self.params['Teams'] = [x.strip() for x in \
                                    self.params['Teams'].split(',')]

        self.dataLocationMapper = WorkQueueDataLocationMapper(self.logger, self.backend,
                                                              phedex = self.phedexService,
                                                              sitedb = self.SiteDB,
                                                              locationFrom = self.params['TrackLocationOrSubscription'],
                                                              incompleteBlocks = self.params['ReleaseIncompleteBlocks'],
                                                              requireBlocksSubscribed = not self.params['ReleaseIncompleteBlocks'],
                                                              fullRefreshInterval = self.params['FullLocationRefreshInterval'],
                                                              updateIntervalCoarseness = self.params['LocationRefreshInterval'])

        # initialize alerts sending client (self.sendAlert() method)
        # usage: self.sendAlert(levelNum, msg = msg) ; level - integer 1 .. 10
        #    1 - 4 - lower levels ; 5 - 10 higher levels
        preAlert, self.alertSender = \
            alertAPI.setUpAlertsMessaging(self, compName = "WorkQueueManager")
        self.sendAlert = alertAPI.getSendAlert(sender = self.alertSender,
                                               preAlert = preAlert)

        self.logger.debug("WorkQueue created successfully")

    def __len__(self):
        """Returns number of Available elements in queue"""
        return self.backend.queueLength()

    def __del__(self):
        """
        Unregister itself with Alert Receiver.
        The registration happened in the constructor when initializing.

        """
        if self.alertSender:
            self.alertSender.unregister()

    def setStatus(self, status, elementIDs = None, SubscriptionId = None, WorkflowName = None):
        """
        _setStatus_, throws an exception if no elements are updated

        """
        try:
            if not elementIDs:
                elementIDs = []
            iter(elementIDs)
            if type(elementIDs) in types.StringTypes:
                raise TypeError
        except TypeError:
            elementIDs = [elementIDs]

        if status == 'Canceled': # Cancel needs special actions
            return self.cancelWork(elementIDs, SubscriptionId, WorkflowName)

        args = {}
        if SubscriptionId:
            args['SubscriptionId'] = SubscriptionId
        if WorkflowName:
            args['RequestName'] = WorkflowName

        affected = self.backend.getElements(elementIDs = elementIDs, **args)
        if not affected:
            raise WorkQueueNoMatchingElements, "No matching elements"

        for x in affected:
            x['Status'] = status
        elements = self.backend.saveElements(*affected)

        return elements

    def setPriority(self, newpriority, *workflowNames):
        """
        Update priority for a workflow, throw exception if no elements affected
        """
        self.logger.info("Priority change request to %s for %s" % (newpriority, str(workflowNames)))
        affected = []
        for wf in workflowNames:
            affected.extend(self.backend.getElements(returnIdOnly = True, RequestName = wf))

        self.backend.updateElements(*affected, Priority = newpriority)

        if not affected:
            raise RuntimeError, "Priority not changed: No matching elements"

    def resetWork(self, ids):
        """Put work back in Available state, from here either another queue
         or wmbs can pick it up.

         If work was Acquired by a child queue, the next status update will
         cancel the work in the child.

         Note: That the same child queue is free to pick the work up again,
          there is no permanent blacklist of queues.
        """
        self.logger.info("Resetting elements %s" % str(ids))
        try:
            iter(ids)
        except TypeError:
            ids = [ids]

        return self.backend.updateElements(*ids, Status = 'Available',
                                           ChildQueueUrl = None, WMBSUrl = None)

    def getWork(self, siteJobs):
        """ 
        Get available work from the queue, inject into wmbs & mark as running

        siteJob is dict format of {site: estimateJobSlot}
        of the resources to get work for.
        """
        results = []
        if not self.backend.isAvailable():
            self.logger.warning('Backend busy or down: skipping fetching of work')
            return results
        matches, _ = self.backend.availableWork(siteJobs)

        if not matches:
            return results

        # cache wmspecs for lifetime of function call, likely we will have multiple elements for same spec.
        #TODO: Check to see if we can skip spec loading - need to persist some more details to element
        wmspecCache = {}
        for match in matches:
            blockName, dbsBlock = None, None
            if self.params['PopulateFilesets']:
                if not wmspecCache.has_key(match['RequestName']):
                    wmspec = self.backend.getWMSpec(match['RequestName'])
                    wmspecCache[match['RequestName']] = wmspec
                else:
                    wmspec = wmspecCache[match['RequestName']]

                if match['Inputs']:
                    blockName, dbsBlock = self._getDBSBlock(match, wmspec)

                match['Subscription'] = self._wmbsPreparation(match,
                                                              wmspec,
                                                              blockName,
                                                              dbsBlock)

            results.append(match)

        del wmspecCache # remove cache explicitly
        self.logger.info('Injected %s units into WMBS' % len(results))
        return results

    def _getDBSBlock(self, match, wmspec):
        """Get DBS info for this block"""
        blockName = match['Inputs'].keys()[0] #TODO: Allow more than one

        if match['ACDC']:
            acdcInfo = match['ACDC']
            acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
            collection = acdc.getDataCollection(acdcInfo['collection'])
            splitedBlockName = ACDCBlock.splitBlockName(blockName)
            fileLists = acdc.getChunkFiles(acdcInfo['collection'],
                                           acdcInfo['fileset'],
                                           splitedBlockName['Offset'],
                                           splitedBlockName['NumOfFiles'],
                                           user = wmspec.getOwner().get("name"),
                                           group = wmspec.getOwner().get("group"))
            block = {}
            block["Files"] = fileLists
            return blockName, block
        else:
            dbs = get_dbs(match['Dbs'])
            if wmspec.getTask(match['TaskName']).parentProcessingFlag():
                dbsBlockDict = dbs.getFileBlockWithParents(blockName)
            else:
                dbsBlockDict = dbs.getFileBlock(blockName)
        return blockName, dbsBlockDict[blockName]

    def _wmbsPreparation(self, match, wmspec, blockName, dbsBlock):
        """Inject data into wmbs and create subscription.
        """
        from WMCore.WorkQueue.WMBSHelper import WMBSHelper
        self.logger.info("Adding WMBS subscription for %s" % match['RequestName'])

        mask = match['Mask']
        wmbsHelper = WMBSHelper(wmspec, blockName, mask, self.params['CacheDir'])

        sub, match['NumOfFilesAdded'] = wmbsHelper.createSubscriptionAndAddFiles(block = dbsBlock)
        self.logger.info("Created top level subscription %s for %s with %s files" % (sub['id'],
                                                                                     match['RequestName'],
                                                                                     match['NumOfFilesAdded']))

        match['SubscriptionId'] = sub['id']
        match['Status'] = 'Running'
        self.backend.saveElements(match)

        return sub

    def _assignToChildQueue(self, queue, *elements):
        """Assign work from parent to queue"""
        for ele in elements:
            ele['Status'] = 'Negotiating'
            ele['ChildQueueUrl'] = queue
            ele['ParentQueueUrl'] = self.params['ParentQueueCouchUrl']
            ele['WMBSUrl'] = self.params["WMBSUrl"]
        work = self.parent_queue.saveElements(*elements)
        requests = ', '.join(list(set(['"%s"' % x['RequestName'] for x in work])))
        self.logger.info('Acquired work for request(s): %s' % requests)
        return work

    def doneWork(self, elementIDs = None, SubscriptionId = None, WorkflowName = None):
        """Mark work as done
        """
        return self.setStatus('Done', elementIDs = elementIDs,
                              SubscriptionId = SubscriptionId,
                              WorkflowName = WorkflowName)

    def cancelWork(self, elementIDs = None, SubscriptionId = None, WorkflowName = None, elements = None):
        """Cancel work - delete in wmbs, delete from workqueue db, set canceled in inbox
           Elements may be directly provided or determined from series of filter arguments
        """
        if not elements:
            args = {}
            if SubscriptionId:
                args['SubscriptionId'] = SubscriptionId
            if WorkflowName:
                args['RequestName'] = WorkflowName
            elements = self.backend.getElements(elementIDs = elementIDs, **args)

        # only cancel in global if work has not been passed to a child queue
        if not self.params['LocalQueueFlag']:
            elements = [x for x in elements if not x['ChildQueueUrl']]

        requestNames = set([x['RequestName'] for x in elements])

        if not requestNames:
            return []

        # if we can talk to wmbs kill the jobs
        if self.params['PopulateFilesets']:
            from WMCore.WorkQueue.WMBSHelper import killWorkflow

            self.logger.debug("""Canceling work in wmbs, workflows: %s""" % (requestNames))
            for workflow in requestNames:
                try:
                    myThread = threading.currentThread()
                    myThread.dbi = self.conn.dbi
                    myThread.logger = self.logger
                    killWorkflow(workflow, self.params["JobDumpConfig"],
                                 self.params["BossAirConfig"])
                except RuntimeError:
                    #TODO: Check this logic and improve if possible
                    if SubscriptionId:
                        self.logger.info("""Cancel update: Only some subscription's canceled.
                                    This might be due to a child subscriptions: %s"""
                                    % elementIDs)

        # update parent elements to canceled
        for wf in requestNames:
            inbox_elements = self.backend.getInboxElements(WorkflowName = wf, returnIdOnly = True)
            if not inbox_elements:
                raise RuntimeError, "Cant find parent for %s" % wf
            self.backend.updateInboxElements(*inbox_elements, Status = 'Canceled')
        # delete elements - no longer need them
        self.backend.deleteElements(*elements)

        return [x.id for x in elements]


    def deleteWorkflows(self, *requests):
        """Delete requests if finished"""
        for request in requests:
            request = self.backend.getInboxElements(elementIDs = [request])
            if len(request) != 1:
                raise RuntimeError, 'Invalid number of requests for %s' % request[0]['RequestName']
            request = request[0]

            if request.inEndState():
                self.logger.info('Deleting request "%s" as it is %s' % (request.id, request['Status']))
                self.backend.deleteElements(request)
            else:
                self.logger.error('Not deleting "%s" as it is %s' % (request.id, request['Status']))

    def queueWork(self, wmspecUrl, request = None, team = None):
        """
        Take and queue work from a WMSpec.

        If request name is provided but doesn't match WMSpec name
        an error is raised.

        If team is provided work will only be available to queue's
        belonging to that team.

        Duplicate specs will be ignored.
        """
        self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl)
        wmspec = WMWorkloadHelper()
        wmspec.load(wmspecUrl)

        # check we haven't already got this work
        try:
            self.backend.getInboxElements(elementIDs = [wmspec.name()])
        except CouchNotFoundError:
            pass
        else:
            self.logger.warning('queueWork(): Ignoring duplicate spec "%s"' % wmspec.name())
            return 1

        if request:
            try:
                Lexicon.requestName(request)
            except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc.
                error = WorkQueueWMSpecError(wmspec, "Request name validation error: %s" % str(ex))
                raise error
            if request != wmspec.name():
                raise WorkQueueWMSpecError(wmspec, 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))

        # Do splitting before we save inbound work to verify the wmspec
        # if the spec fails it won't enter the queue
        inbound = self.backend.createWork(wmspec, TeamName = team, WMBSUrl = self.params["WMBSUrl"])

        # either we have already split the work or we do that now
        work = self.backend.getElementsForWorkflow(wmspec.name())
        if work:
            self.logger.info('Request "%s" already split - Resuming' % str(wmspec.name()))
        else:
            work = self._splitWork(wmspec, None, inbound['Inputs'], inbound['Mask'])
            self.backend.insertElements(work, parent = inbound) # if this fails, rerunning will pick up here

        self.backend.insertElements([inbound]) # save inbound work to signal we have completed queueing
        return len(work)
示例#29
0
class DrainStatusAPI(object):
    """
    Provides methods for querying dbs and condor for drain statistics
    """
    def __init__(self, config):
        # queue url used in WorkQueueManager
        self.thisAgentUrl = "http://" + config.Agent.hostName + ":5984"
        self.globalBackend = WorkQueueBackend(config.WorkloadSummary.couchurl)
        self.localBackend = WorkQueueBackend(config.WorkQueueManager.couchurl)
        self.dbsUtil = DBSBufferUtil()
        self.condorAPI = PyCondorAPI()
        self.condorStates = ("Running", "Idle")

    def collectDrainInfo(self):
        """
        Call methods to check the drain status
        """
        results = {}
        results['workflows_completed'] = self.checkWorkflows()

        # if workflows are completed, collect additional drain statistics
        if results['workflows_completed']:
            results['upload_status'] = self.checkFileUploadStatus()
            results['condor_status'] = self.checkCondorStates()
            results['local_wq_status'] = self.checkLocalWQStatus(
                dbname="workqueue")
            results['local_wqinbox_status'] = self.checkLocalWQStatus(
                dbname="workqueue_inbox")
            results['global_wq_status'] = self.checkGlobalWQStatus()

        return results

    def checkWorkflows(self):
        """
        Check to see if all workflows have a 'completed' status
        """
        results = self.dbsUtil.isAllWorkflowCompleted()
        return results

    def checkCondorStates(self):
        """
        Check idle and running jobs in Condor
        """
        results = {}
        jobs = self.condorAPI.getCondorJobsSummary()
        for state in self.condorStates:
            # if there is an error, report it instead of the length of an empty list
            if not jobs:
                results[state.lower()] = None
            else:
                results[state.lower()] = int(jobs[0].get(state))

        return results

    def checkFileUploadStatus(self):
        """
        Check file upload status:
            Blocks open in DBS
            Files not uploaded in DBS
            Files not uploaded to Phedex
        """
        results = {}
        results['dbs_open_blocks'] = self.dbsUtil.countOpenBlocks()
        results['dbs_notuploaded'] = self.dbsUtil.countFilesByStatus(
            status="NOTUPLOADED")
        results['phedex_notuploaded'] = self.dbsUtil.countPhedexNotUploaded()
        return results

    def checkLocalWQStatus(self, dbname):
        """
        Query local WorkQueue workqueue/workqueue_inbox database to see whether
        there are any active elements in this agent.
        """
        results = {}

        for st in ('Available', 'Negotiating', 'Acquired', 'Running'):
            if dbname == "workqueue":
                elements = self.localBackend.getElements(status=st,
                                                         returnIdOnly=True)
            else:
                elements = self.localBackend.getInboxElements(
                    status=st, returnIdOnly=True)
            results[st] = len(elements)
        return results

    def checkGlobalWQStatus(self):
        """
        Query Global WorkQueue workqueue database to see whether there are
        any active elements set to this agent.
        """
        results = {}

        for st in ("Acquired", "Running"):
            elements = self.globalBackend.getElements(
                status=st, returnIdOnly=True, ChildQueueUrl=self.thisAgentUrl)
            results[st] = len(elements)
        return results