def main():
    usage = "usage: %prog [options] workflow"
    parser = OptionParser(usage=usage)
    parser.add_option("-f","--file", dest="fileName", default=None,
                        help="Input file")
    parser.add_option("-v","--verbose",action="store_true", dest="verbose", default=False,
                        help="Show detailed info")
    parser.add_option("--test",action="store_true", dest="test", default=False,
                        help="Only test and console output (doesn't make the actual calls)")
    (options, args) = parser.parse_args()

    if len(args) != 1 and options.fileName is None:
        parser.error("Provide the workflow name or a file")
        sys.exit(1)
    if options.fileName is None:
        workflows = [args[0]]
    else:
        workflows = [l.strip() for l in open(options.fileName) if l.strip()]
    
    datasets = []
    i = 0

    print "Getting output from workflows"
    for wf in workflows:
        if options.verbose:
            print wf
        try:
            ds = rqmgr.outputdatasetsWorkflow(url, wf)
            datasets += ds
        except:
            print wf, "skipped"
    reqs = makeDeletionRequests(url, datasets, options.verbose, options.test)
    print "Deletion request made:"
    print '\n'.join(reqs)
Exemplo n.º 2
0
def closeOutRedigiWorkflows(url, workflows):
    """
    Closes out a list of redigi workflows
    """
    noSiteWorkflows = []
    for workflow in workflows:
        closeOutWorkflow = True
        inputDataset = reqMgrClient.getInputDataSet(url, workflow)
        datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        for dataset in datasets:
            closeOutDataset = False
            percentage = percentageCompletion(url, workflow, dataset)
            phedexSubscription = phedexClient.hasCustodialSubscription(dataset)
            duplicate = None
            # if dataset has subscription and more than 95% events we check
            # duplicates
            if phedexSubscription and percentage >= float(0.95):    
                duplicate = dbs3Client.duplicateRunLumi(dataset)
                #if not duplicate events, dataset is ready
                if not duplicate:
                    closeOutDataset = True
                else:
                    closeOutDataset = False
            #validate when percentage is ok but has not phedex subscription
            if percentage >= float(0.95) and not phedexSubscription:
                noSiteWorkflows.append(workflow)
            #if at least one dataset is not ready wf cannot be closed out
            closeOutWorkflow = closeOutWorkflow and closeOutDataset
            print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(percentage*100)),
                                                    str(phedexSubscription), 100, duplicate, closeOutDataset)
        #workflow can only be closed out if all datasets are ready
        if closeOutWorkflow:
            reqMgrClient.closeOutWorkflowCascade(url, workflow)
    print '-'*180
    return noSiteWorkflows
Exemplo n.º 3
0
def main():
    url='cmsweb.cern.ch'
    
    #Create option parser
    usage = "\n       python %prog [-f FILE_NAME | WORKFLOW_NAME ...]\n"
    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', help='Text file with a list of workflows', dest='file')
    parser.add_option('-i', '--invalidate', action='store_true', default=False,
                      help='Also invalidate output datasets on DBS', dest='invalidate')
    (options, args) = parser.parse_args()
    
    if options.file:
        wfs = [l.strip() for l in open(options.file) if l.strip()]
    elif args:
        wfs = args
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)
    
    for wf in wfs:
        print "Rejecting workflow: " + wf
        reqMgrClient.rejectWorkflow(url, wf)
        print "Rejected"
        if options.invalidate:
            print "Invalidating datasets"
            datasets = reqMgrClient.outputdatasetsWorkflow(url, wf)
            for ds in datasets:
                print ds
                dbs3.setDatasetStatus(ds, 'INVALID', files=True)
Exemplo n.º 4
0
def classifyCompletedRequests(url, requests):
    """
    Sorts completed requests using the type.
    returns a dic cointaining a list for each
    type of workflows.
    """
    workflows={'ReDigi':[],'MonteCarloFromGEN':[],'MonteCarlo':[] , 'ReReco':[], 'LHEStepZero':[]}
    for request in requests:
        name=request['id']
        #if a wrong or weird name
        if len(request['key'])<3:
            print request
            continue
        status=request['key'][1]
        #only completed requests
        if status=='completed':
            requestType=request['key'][2]
            #sort by type
            if requestType=='MonteCarlo':
                #MonteCarlo's which datasets end with /GEN
                #are Step0
                datasets = reqMgrClient.outputdatasetsWorkflow(url, name)
                m = re.search('.*/GEN$', datasets[0])
                if m:
                    workflows['LHEStepZero'].append(name)
                else:
                    workflows[requestType].append(name)
            elif requestType in ['MonteCarloFromGEN', 'LHEStepZero', 'ReDigi', 'ReReco']:
                workflows[requestType].append(name)
    return workflows
def main():
    """
    Read the text file, for each workflow try:
    First abort it, then clone it.
    """
    args=sys.argv[1:]
    if not len(args)==3:
        print "usage:abortAndClone file.txt user group"
        sys.exit(0)
    filename = args[0]
    user = args[1]
    group = args[2]

    #reading workflow list
    workflows = [wf.strip() for wf in open(filename).readlines() if wf.strip()]
    for workflow in workflows:
        #abort workflow
        print "Aborting workflow: " + workflow
        reqMgrClient.abortWorkflow(url, workflow)
        print "Aborted. Now cloning workflow..."
            
        #invalidates datasets
        print "Invalidating datasets"
        
        datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        for dataset in datasets:
            print dataset
            dbs3.setStatusDBS3(dbs3_url, dataset, 'INVALID', None)

        #clone workflow
        clone = resubmit.cloneWorkflow(workflow, user, group)
        print "Cloned workflow: ",   clone
    sys.exit(0);
Exemplo n.º 6
0
def getMissingEvents(workflow):
    """
    Gets the missing events for the workflow
    """
    inputEvents = reqMgrClient.getInputEvents(url, workflow)
    dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop()
    outputEvents = reqMgrClient.getOutputEvents(url, workflow, dataset)
    return int(inputEvents) - int(outputEvents)
Exemplo n.º 7
0
def main():
    """
    Read the text file, for each workflow try:
    First abort it, then clone it.
    """
    usage = "\n       python %prog [options] [WORKFLOW_NAME] [USER GROUP]\n"\
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"\
            "USER: the user for creating the clone, if empty it will\n"\
            "      use the OS user running the script\n"\
            "GROUP: the group for creating the clone, if empty it will\n"\
            "      use 'DATAOPS' by default"

    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file')
    (options, args) = parser.parse_args()
    
    # Check the arguments, get info from them
    if options.file:
        wfs = [l.strip() for l in open(options.file) if l.strip()]
        if len(args) == 2:
            user = args[0]
            group = args[1]
        elif len(args) == 0:
            #get os username by default
            uinfo = pwd.getpwuid(os.getuid())
            user = uinfo.pw_name
            #group by default DATAOPS
            group = 'DATAOPS'
    else:
        if len(args) == 3:
            user = args[1]
            group = args[2]
        elif len(args) == 1:
            #get os username by default
            uinfo = pwd.getpwuid(os.getuid())
            user = uinfo.pw_name
            #group by default DATAOPS
            group = 'DATAOPS'
        else:
            parser.error("Provide the workflow of a file of workflows")
            sys.exit(1)
        #name of workflow
        wfs = [args[0]]

    for wf in wfs:
        #abort workflow
        print "Rejecting workflow: " + wf
        reqMgrClient.rejectWorkflow(url, wf)
        #invalidates datasets
        print "Invalidating datasets"
        datasets = reqMgrClient.outputdatasetsWorkflow(url, wf)
        for ds in datasets:
            print ds
            dbs3.setDatasetStatus(ds, 'INVALID', files=True)

        #clone workflow
        clone = resubmit.cloneWorkflow(wf, user, group)
    sys.exit(0);
Exemplo n.º 8
0
def main():
    """
    Read the text file, for each workflow try:
    First abort it, then clone it.
    """
    usage = "\n       python %prog [options] [WORKFLOW_NAME] [USER GROUP]\n"\
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"\
            "USER: the user for creating the clone, if empty it will\n"\
            "      use the OS user running the script\n"\
            "GROUP: the group for creating the clone, if empty it will\n"\
            "      use 'DATAOPS' by default"

    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', help='Text file of workflows to Abort and Clone', dest='file')
    (options, args) = parser.parse_args()
    
    # Check the arguments, get info from them
    if options.file:
        wfs = [l.strip() for l in open(options.file) if l.strip()]
        if len(args) == 2:
            user = args[0]
            group = args[1]
        elif len(args) == 0:
            #get os username by default
            uinfo = pwd.getpwuid(os.getuid())
            user = uinfo.pw_name
            #group by default DATAOPS
            group = 'DATAOPS'
    else:
        if len(args) == 3:
            user = args[1]
            group = args[2]
        elif len(args) == 1:
            #get os username by default
            uinfo = pwd.getpwuid(os.getuid())
            user = uinfo.pw_name
            #group by default DATAOPS
            group = 'DATAOPS'
        else:
            parser.error("Provide the workflow of a file of workflows")
            sys.exit(1)
        #name of workflow
        wfs = [args[0]]

    for wf in wfs:
        #abort workflow
        print "Aborting workflow: " + wf
        reqMgrClient.abortWorkflow(url, wf)
        #invalidates datasets
        print "Invalidating datasets"
        datasets = reqMgrClient.outputdatasetsWorkflow(url, wf)
        for ds in datasets:
            print ds
            dbs3.setDatasetStatus(ds, 'INVALID', files=True)

        #clone workflow
        clone = resubmit.cloneWorkflow(wf, user, group)
    sys.exit(0);
Exemplo n.º 9
0
def main():
    usage = "\n       python %prog [options] [WORKFLOW_NAME]\n" \
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"

    parser = OptionParser(usage=usage)
    parser.add_option('-f', '--file', help='Text file of workflows to Reject and Clone', dest='file')
    parser.add_option('-c', '--clone', help='Are the workflows going to be cloned? The default value is False',action="store_true", dest='clone', default=False)
    parser.add_option('-i', '--invalidate', help='Invalidate datasets? The default value is False',action="store_true", dest='invalidate', default=False)
    parser.add_option("-u", "--user", dest="user",help="The user for creating the clone, if empty it will use the OS user running the script")
    parser.add_option("-g", "--group", dest="group", default='DATAOPS',help="The group for creating the clone, if empty it will, use 'DATAOPS' by default")
    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) >0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: "+ workflow+" is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: "+ workflow)
        reqMgrClient.invalidateWorkflow(url,workflow,workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, 'INVALID', files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: "+ workflow)
            cloned = resubmit.cloneWorkflow(workflow, user, options.group)
    sys.exit(0);
Exemplo n.º 10
0
def classifyCompletedRequests(url, requests):
    """
    Sorts completed requests using the type.
    returns a dic cointaining a list for each
    type of workflows.
    """
    workflows={'ReDigi':[],'MonteCarloFromGEN':[],'MonteCarlo':[] , 'ReReco':[], 'LHEStepZero':[], 'StoreResults':[],
                'TaskChain':[]}
    for request in requests:

        name=request['id']
        #if a wrong or weird name
        if len(request['key'])<3:
            print request
            continue
        
        #discard RelVals
        if 'RVCMSSW' in name:
            continue
        
        status=request['key'][1]
        #only completed requests
        if status=='completed':
            requestType=request['key'][2]
            #sort by type
            if requestType=='MonteCarlo':
                #MonteCarlo's which datasets end with /GEN
                #are Step0
                try:
                    datasets = reqMgrClient.outputdatasetsWorkflow(url, name)
                    m = re.search('.*/GEN$', datasets[0])
                    if m:
                        workflows['LHEStepZero'].append(name)
                    else:
                        workflows[requestType].append(name)
                    #TODO identify MonteCarlo with two output
                except Exception as e:
                    print "Error on wf", name
                    continue
            elif requestType=='TaskChain':
                #only taskchains with MC or ReDigi subType
                subType = reqMgrClient.getWorkflowSubType(url, name)
                if subType in ['MC','ReDigi']:
                    workflows[requestType].append(name)
            elif requestType in ['MonteCarloFromGEN', 'LHEStepZero', 'ReDigi', 'ReReco', 'StoreResults']:
                workflows[requestType].append(name)
            
    return workflows
Exemplo n.º 11
0
def extendWorkflow(workflow,
                   user,
                   group,
                   verbose=False,
                   events=None,
                   firstlumi=None):

    if events is None:
        events = getMissingEvents(workflow)
    events = int(events)

    if firstlumi is None:
        #get the last lumi of the dataset
        dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop()

        lastLumi = dbs3Client.getMaxLumi(dataset)
        firstlumi = lastLumi
    firstlumi = int(firstlumi)

    # Get info about the workflow to be cloned
    cache = reqMgrClient.getWorkflowInfo(url, workflow)

    schema = modifySchema(cache, workflow, user, group, events, firstlumi,
                          None)
    if verbose:
        pprint(schema)
    print 'Submitting workflow'
    # Submit cloned workflow to ReqMgr
    response = reqMgrClient.submitWorkflow(url, schema)
    if verbose:
        print "RESPONSE", response

    #find the workflow name in response
    m = re.search("details\/(.*)\'", response)
    if m:
        newWorkflow = m.group(1)
        print 'Cloned workflow: ' + newWorkflow
        print 'Extended with', events, 'events'
        print response

        # Move the request to Assignment-approved
        print 'Approve request response:'
        data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
        print data
    else:
        print response
Exemplo n.º 12
0
def closeOutStep0Requests(url, workflows):
    """
    Closes either montecarlo step0 requests
    """
    noSiteWorkflows = []
    for workflow in workflows:
        datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        status = reqMgrClient.getWorkflowStatus(url, workflow)
        #if not completed skip
        if status != 'completed':
            continue
        closeOutWorkflow = True
        #skip montecarlos on a special queue
        if reqMgrClient.getRequestTeam(url, workflow) == 'analysis':
            continue
        for dataset in datasets:
            closeOutDataset = False
            percentage = percentageCompletion(url, workflow, dataset)
            phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset)
            transPerc = 0
            closedBlocks = None
            duplicate = None
            correctLumis = None
            # if dataset has subscription and enough events we check
            # duplicates, transfer percentage, closed blocks and lumis
            if phedexSubscription and percentage >= float(0.95):
                transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription)
                duplicate = dbs3Client.duplicateLumi(dataset)
                correctLumis = checkCorrectLumisEventGEN(dataset)
                #TODO validate closed blocks
                if not duplicate and correctLumis:
                    closeOutDataset = True
                else:
                    closeOutDataset = False
            #validate when percentage is ok but has not phedex subscription
            if percentage >= float(0.95) and not phedexSubscription:
                noSiteWorkflows.append(workflow)
            #if at least one dataset is not ready wf cannot be closed out
            closeOutWorkflow = closeOutWorkflow and closeOutDataset
            print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s| ' % (workflow, dataset,str(int(percentage*100)),
                        str(phedexSubscription), str(correctLumis), duplicate, closeOutDataset)
        #workflow can only be closed out if all datasets are ready
        if closeOutWorkflow:
            reqMgrClient.closeOutWorkflowCascade(url, workflow)
    print '-'*180
    return noSiteWorkflows
Exemplo n.º 13
0
def closeOutMonterCarloRequests(url, workflows):
    """
    Closes either montecarlo or montecarlo from gen
    workflows
    """
    noSiteWorkflows = []
    for workflow in workflows:
        datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        closeOutWorkflow = True
        #skip montecarlos on a special queue
        if reqMgrClient.getRequestTeam(url, workflow) == 'analysis':
            continue
        for dataset in datasets:
            closePercentage = 0.95
            # validation for SMS montecarlos
            if 'SMS' in dataset:
                closePercentage= 1.00
            percentage = percentageCompletion(url, workflow, dataset)
            phedexSubscription = phedexClient.getCustodialMoveSubscriptionSite(dataset)
            transPerc = 0
            closedBlocks = None
            duplicate = None
            # if dataset has subscription and enough events we check
            # duplicates, transfer percentage and closed blocks
            if phedexSubscription and percentage >= float(closePercentage):
                transPerc = phedexClient.getTransferPercentage(url, dataset, phedexSubscription)
                duplicate = dbs3Client.duplicateLumi(dataset)
                if not duplicate:
                    closeOutDataset = True
                else:
                    closeOutDataset = False
            else:
                closeOutDataset = False
            #validate when percentage is ok but has not phedex subscription
            if percentage >= float(closePercentage) and not phedexSubscription:
                noSiteWorkflows.append(workflow)
            #if at least one dataset is not ready wf cannot be closed out
            closeOutWorkflow = closeOutWorkflow and closeOutDataset
            print '| %80s | %100s | %4s | %5s| %3s | %5s| %5s|' % (workflow, dataset,str(int(percentage*100)),
                        str(phedexSubscription), str(int(transPerc*100)), duplicate, closeOutDataset)
        #workflow can only be closed out if all datasets are ready
        if closeOutWorkflow:
            reqMgrClient.closeOutWorkflowCascade(url, workflow)
    #separation line
    print '-'*180
    return noSiteWorkflows
Exemplo n.º 14
0
def getDatasetVersion(url, workflow, era, procstring):
        versionNum = 1
        outputs = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        for output in outputs:
           bits = output.split('/')
           outputCheck = '/'+bits[1]+'/'+era+'-'+procstring+'*/'+bits[len(bits)-1]

           datasets = getDatasets(outputCheck)
           for dataset in datasets:
              datasetName = dataset['dataset']
              matchObj = re.match(r".*-v(\d+)/.*", datasetName)
              if matchObj:
                 currentVersionNum = int(matchObj.group(1))
                 if versionNum <= currentVersionNum:
                    versionNum=versionNum+1

        return versionNum
Exemplo n.º 15
0
def main():
    args=sys.argv[1:]
    if not len(args)==1:
        print "usage:WorkflowPercentage.py workflowname"
        sys.exit(0)
    workflow=args[0]

    url='cmsweb.cern.ch'

    #retrieve the output datasets
    outputDataSets=reqMgrClient.outputdatasetsWorkflow(url, workflow)    

    for dataset in outputDataSets:
        perc = percentageCompletion(url, workflow, dataset, verbose=True)
        print dataset,"match:",perc,"%"

    sys.exit(0);
Exemplo n.º 16
0
def main():
    usage = "usage: %prog [options] workflow"
    parser = OptionParser(usage=usage)
    parser.add_option("-f",
                      "--file",
                      dest="fileName",
                      default=None,
                      help="Input file")
    parser.add_option("-v",
                      "--verbose",
                      action="store_true",
                      dest="verbose",
                      default=False,
                      help="Show detailed info")
    parser.add_option(
        "--test",
        action="store_true",
        dest="test",
        default=False,
        help="Only test and console output (doesn't make the actual calls)")
    (options, args) = parser.parse_args()

    if len(args) != 1 and options.fileName is None:
        parser.error("Provide the workflow name or a file")
        sys.exit(1)
    if options.fileName is None:
        workflows = [args[0]]
    else:
        workflows = [l.strip() for l in open(options.fileName) if l.strip()]

    datasets = []
    i = 0

    print "Getting output from workflows"
    for wf in workflows:
        if options.verbose:
            print wf
        try:
            ds = rqmgr.outputdatasetsWorkflow(url, wf)
            datasets += ds
        except:
            print wf, "skipped"
    reqs = makeDeletionRequests(url, datasets, options.verbose, options.test)
    print "Deletion request made:"
    print '\n'.join(reqs)
Exemplo n.º 17
0
def duplicateLumisWorkflow(url, workflow, verbose=False):
    """
    Shows where the workflow hs duplicate events    
    """
    datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
    duplicate = False
    print 'workflow:',workflow
    #check e
    for dataset in datasets:
        print 'dataset :', dataset		
        #if dbs3Client.duplicateLumi(dataset, verbose):
        if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True):
            duplicate = True
            #fast check, one dataset duplicated
            if not verbose:
                print 'Has duplicated lumis'
                return True
    if not duplicate:
        print "No duplicate found"
    return duplicate
Exemplo n.º 18
0
def duplicateLumisWorkflow(url, workflow, verbose=False):
    """
    Shows where the workflow hs duplicate events    
    """
    datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
    duplicate = False
    print 'workflow:', workflow
    #check e
    for dataset in datasets:
        print 'dataset :', dataset
        #if dbs3Client.duplicateLumi(dataset, verbose):
        if dbs3Client.duplicateRunLumi(dataset, verbose, skipInvalid=True):
            duplicate = True
            #fast check, one dataset duplicated
            if not verbose:
                print 'Has duplicated lumis'
                return True
    if not duplicate:
        print "No duplicate found"
    return duplicate
Exemplo n.º 19
0
def extendWorkflow(workflow, user, group, verbose=False, events=None, firstlumi=None):
    
    if events is None:
        events = getMissingEvents(workflow)
    events = int(events)
    
    if firstlumi is None:
        #get the last lumi of the dataset
        dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop()
          
        lastLumi = dbs3Client.getMaxLumi(dataset)
        firstlumi = lastLumi
    firstlumi = int(firstlumi)
    
    # Get info about the workflow to be cloned
    helper = reqMgrClient.retrieveSchema(workflow)
    schema = modifySchema(helper, workflow, user, group, events, firstlumi)
    schema['OriginalRequestName'] = workflow
    if verbose:
        pprint(schema)
    print 'Submitting workflow'
    # Sumbit cloned workflow to ReqMgr
    response = reqMgrClient.submitWorkflow(url,schema)
    if verbose:
        print "RESPONSE", response
    
    #find the workflow name in response
    m = re.search("details\/(.*)\'",response)
    if m:
        newWorkflow = m.group(1)
        print 'Cloned workflow: '+newWorkflow
        print 'Extended with', events, 'events'
        print response
        
        # Move the request to Assignment-approved
        print 'Approve request response:'
        data = reqMgrClient.setWorkflowApproved(url, newWorkflow)
        print data
    else:
        print response
    pass
Exemplo n.º 20
0
def closeOutReRecoWorkflows(url, workflows):
    """
    Closeout ReReco workflows
    """
    noSiteWorkflows = []
    for workflow in workflows:
        if 'RelVal' in workflow:
            continue
        if 'TEST' in workflow:
            continue        
        datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
        inputDataset = reqMgrClient.getInputDataSet(url, workflow)
        closeOutWorkflow = True
        #check if dataset is ready
        for dataset in datasets:
            duplicate = False
            closeOutDataset = True
            percentage = percentageCompletion(url, workflow, dataset)
            phedexSubscription = phedexClient.hasCustodialSubscription(dataset)
            closeOutDataset = False
            #dataset can be closed out only with 100% of events
            if percentage == 1 and phedexSubscription and not duplicate:
                closeOutDataset = True
            else:
                closeOutDataset = False
            
            #validate when percentage is ok but has not phedex subscription
            if percentage == 1 and not phedexSubscription:
                noSiteWorkflows.append(workflow)

            #if at least one dataset is not ready wf cannot be closed out
            closeOutWorkflow = closeOutWorkflow and closeOutDataset
            print '| %80s | %100s | %4s | %5s| %3s | %5s|%5s| ' % (workflow, dataset,str(int(percentage*100)),
                                                    str(phedexSubscription), 100, duplicate, closeOutDataset)
        #workflow can only be closed out if all datasets are ready
        if closeOutWorkflow:
            reqMgrClient.closeOutWorkflowCascade(url, workflow)
    print '-'*180
    return noSiteWorkflows
Exemplo n.º 21
0
def main():
    url = 'cmsweb.cern.ch'

    #Create option parser
    usage = "\n       python %prog [-f FILE_NAME | WORKFLOW_NAME ...]\n"
    parser = OptionParser(usage=usage)
    parser.add_option('-f',
                      '--file',
                      help='Text file with a list of workflows',
                      dest='file')
    parser.add_option('-i',
                      '--invalidate',
                      action='store_true',
                      default=False,
                      help='Also invalidate output datasets on DBS',
                      dest='invalidate')
    (options, args) = parser.parse_args()

    if options.file:
        wfs = [l.strip() for l in open(options.file) if l.strip()]
    elif args:
        wfs = args
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    for wf in wfs:
        print "Aborting workflow: " + wf
        reqMgrClient.abortWorkflow(url, wf)
        print "Aborted"

        if options.invalidate:
            print "Invalidating datasets"
            datasets = reqMgrClient.outputdatasetsWorkflow(url, wf)
            for ds in datasets:
                print ds
                dbs3.setDatasetStatus(ds, 'INVALID', files=True)

    sys.exit(0)
Exemplo n.º 22
0
def modifySchema(helper, workflow, user, group, events):
    """
    Adapts schema to right parameters
    """
    result = {}
    # Add AcquisitionEra, ProcessingString and ProcessingVersion
    result["ProcessingString"] = helper.getProcessingString()
    result["ProcessingVersion"] = helper.getProcessingVersion()
    result["AcquisitionEra"] = helper.getAcquisitionEra()
    
    for key, value in helper.data.request.schema.dictionary_().items():
        #previous versions of tags
        if key == 'ProcConfigCacheID':
            result['ConfigCacheID'] = value
        elif key == 'RequestSizeEvents':
            result['RequestSizeEvents'] = value
        #requestor info
        elif key == 'Requestor':
            result['Requestor'] = user
        elif key == 'Group':
            result['Group'] = group
        #preppend EXT to recognize as an extension
        elif key == 'RequestString':
            result['RequestString'] = 'EXT_'+str(value)
        #if emtpy
        elif key in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"] and not value:
            result[key]=[]
        #skip empty entries
        elif not value:
            continue
        elif value != None:
            result[key] = value

    #extend workflow so it will safely start outside of the boundary
    RequestNumEvents = int(result['RequestNumEvents'])
    FirstEvent = int(result['FirstEvent'])
    FirstLumi = int(result['FirstLumi'])
    EventsPerLumi = int(result['EventsPerLumi'])
    FilterEfficiency = float(result['FilterEfficiency'])

    #FirstEvent_NEW > FirstEvent + RequestNumEvents
    #the fist event needs to be oustide the range
    result['FirstEvent'] = FirstEvent + RequestNumEvents + DELTA_EVENTS

    #FirstLumi_NEW > FirstLumi + RequestNumEvents/events_per_job/filterEff
    # same for the first lumi, needs to be after the last lumi
    """
    result['FirstLumi'] = int(FirstLumi
                            + math.ceil( RequestNumEvents / float(EventsPerLumi) / FilterEfficiency )
                            + DELTA_LUMIS / FilterEfficiency )
    """
    #get the last lumi of the dataset
    dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop()
    LastLumi = dbs3Client.getMaxLumi(dataset)

    result['FirstLumi'] = LastLumi + DELTA_LUMIS
    #only the desired events    
    result['RequestNumEvents'] = events

    if 'LumisPerJob' not in result and result['RequestType']=='MonteCarlo':
        #seek for lumis per job on helper
        splitting = helper.listJobSplittingParametersByTask()
        lumisPerJob = 300
        for k, v in splitting.items():
            if k.endswith('/Production'):
                if 'lumis_per_job' in v:
                    lumisPerJob = v['lumis_per_job']
        result['LumisPerJob'] = lumisPerJob

    #TODO do this always?
    if 'EventsPerJob' not in result and result['RequestType']=='MonteCarlo':
        #seek for events per job on helper
        splitting = helper.listJobSplittingParametersByTask()
        eventsPerJob = 120000
        for k, v in splitting.items():
            if k.endswith('/Production'):
                if 'events_per_job' in v:
                    eventsPerJob = v['events_per_job']
        result['EventsPerJob'] = eventsPerJob
   
    if 'MergedLFNBase' not in result:
        result['MergedLFNBase'] = helper.getMergedLFNBase()
    return result
Exemplo n.º 23
0
def main():
    usage = (
        "\n       python %prog [options] [WORKFLOW_NAME]\n"
        "WORKFLOW_NAME: if the list file is provided this should be empty\n"
    )

    parser = OptionParser(usage=usage)
    parser.add_option("-f", "--file", help="Text file of workflows to Reject and Clone", dest="file")
    parser.add_option(
        "-c",
        "--clone",
        help="Are the workflows going to be cloned? The default value is False",
        action="store_true",
        dest="clone",
        default=False,
    )
    parser.add_option(
        "-i",
        "--invalidate",
        help="Invalidate datasets? The default value is False",
        action="store_true",
        dest="invalidate",
        default=False,
    )
    parser.add_option(
        "-u",
        "--user",
        dest="user",
        help="The user for creating the clone, if empty it will use the OS user running the script",
    )
    parser.add_option(
        "-g",
        "--group",
        dest="group",
        default="DATAOPS",
        help="The group for creating the clone, if empty it will, use 'DATAOPS' by default",
    )
    parser.add_option(
        "-m",
        "--memory",
        dest="memory",
        help="Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024",
    )

    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) > 0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: " + workflow + " is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: " + workflow)
        reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, "INVALID", files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: " + workflow)
            if options.memory:
                mem = float(options.memory)
            else:
                mem = workflowInfo.info["Memory"]
            cloned = resubmit.cloneWorkflow(workflow, user, options.group, memory=mem)
    sys.exit(0)
Exemplo n.º 24
0
def main():
    
    usage = "python %prog [OPTIONS]"
    parser = OptionParser(usage)
    parser.add_option("-a", "--doall",dest="doall", action="store_true" , default=False, 
                      help="It will analyze all datasets of the workflow from the beginning. If this option is true,"\
                        " you should provide a workflow name or a list of them in the --file option.")
    parser.add_option("-f", "--file",dest="file", 
                      help="Input file with the contents of duplicateEvents.py (a list of lumis and files)."\
                      " If you are using the --doall option, it should contain a list of workflows instead")
    
    options, args = parser.parse_args()
    workflows = None
    #if we not doing all, input should be treated as list of lumis an files
    if not options.doall and options.file:
        lines = [l.strip() for l in open(options.file)]
        graphs = buildGraphs(lines)
    # if do all and input file
    elif options.doall and options.file:
        workflows = [l.strip() for l in open(options.file)]
    elif options.doall and not options.file:
        workflows = args
    else:
        parser.error("You should provide an input file with the output of duplicateEvents")

    # get the output datasets of the workflos and create the graph
    if workflows:
        datasets = []
        for wf in workflows:
            datasets += reqMgrClient.outputdatasetsWorkflow(url, wf);
        
        graphs = {}
        #analyze each dataset
        for dataset in datasets:
            dup, lumis = dbs.duplicateRunLumi(dataset, verbose="dict", skipInvalid=True)
            #print lumis
            graphs[dataset] = buildGraph(lumis)
            
    
    for dataset, graph in graphs.items():
        #look for datasetname
        print "Getting events per file"
        events = getFileEvents(dataset, graph.keys())
        try:
            #first algorithm that assumes bipartition        
            files = colorBipartiteGraph(graph, events)
        except Exception as e:
            #second, algorithm
            #files = deleteMaxDegreeFirst(graph, events)
            files = deleteSmallestVertexFirst(graph, events)
        
        total = dbs.getEventCountDataSet(dataset)
        invalid = dbs.getEventCountDataSetFileList(dataset, files)
    
        print 'total events %s'%total
        print 'invalidated files %s'%len(files)
        print 'invalidated events %s'%invalid
        if total:
            print '%s%%'%(float(total-invalid)/total*100.0)
        for f in sorted(files):
            print f
Exemplo n.º 25
0
def main():
    usage = "\n       python %prog [options] [WORKFLOW_NAME]\n" \
            "WORKFLOW_NAME: if the list file is provided this should be empty\n"

    parser = OptionParser(usage=usage)
    parser.add_option('-f',
                      '--file',
                      help='Text file of workflows to Reject and Clone',
                      dest='file')
    parser.add_option(
        '-c',
        '--clone',
        help='Are the workflows going to be cloned? The default value is False',
        action="store_true",
        dest='clone',
        default=False)
    parser.add_option('-i',
                      '--invalidate',
                      help='Invalidate datasets? The default value is False',
                      action="store_true",
                      dest='invalidate',
                      default=False)
    parser.add_option(
        "-u",
        "--user",
        dest="user",
        help=
        "The user for creating the clone, if empty it will use the OS user running the script"
    )
    parser.add_option(
        "-g",
        "--group",
        dest="group",
        default='DATAOPS',
        help=
        "The group for creating the clone, if empty it will, use 'DATAOPS' by default"
    )
    parser.add_option(
        "-m",
        "--memory",
        dest="memory",
        help=
        "Set max memory for the clone. At assignment, this will be used to calculate maxRSS = memory*1024"
    )

    (options, args) = parser.parse_args()

    # Check the arguments, get info from them
    if options.file:
        try:
            workflows = [l.strip() for l in open(options.file) if l.strip()]
        except:
            parser.error("Provide a valid file of workflows")
            sys.exit(1)
    elif len(args) > 0:
        # name of workflow
        workflows = [args[0]]
    else:
        parser.error("Provide the workflow of a file of workflows")
        sys.exit(1)

    if not options.user:
        # get os username by default
        uinfo = pwd.getpwuid(os.getuid())
        user = uinfo.pw_name
    else:
        user = options.user

    for workflow in workflows:
        try:
            workflowInfo = reqMgrClient.Workflow(workflow)
        except:
            print("The workflow name: " + workflow + " is  not valid.")
            continue
        # invalidates workflow
        print("Invalidating the workflow: " + workflow)
        reqMgrClient.invalidateWorkflow(url, workflow, workflowInfo.status)

        # invalidates datasets
        if options.invalidate:
            print("Invalidating datasets")
            datasets = reqMgrClient.outputdatasetsWorkflow(url, workflow)
            for dataset in datasets:
                print(dataset)
                dbs3.setDatasetStatus(dataset, 'INVALID', files=True)

        # clones workflow
        if options.clone:
            print("Cloning workflow: " + workflow)
            if options.memory:
                mem = options.memory
            else:
                mem = workflowInfo.info["Memory"]
            cloned = resubmit.cloneWorkflow(workflow,
                                            user,
                                            options.group,
                                            memory=mem)
    sys.exit(0)
Exemplo n.º 26
0
def modifySchema(helper, workflow, user, group, events):
    """
    Adapts schema to right parameters
    """
    result = {}
    #pprint.pprint(helper.data.request.schema.dictionary_())
    for key, value in helper.data.request.schema.dictionary_().items():
        #previous versions of tags
        if key == 'ProcConfigCacheID':
            result['ConfigCacheID'] = value
        elif key == 'RequestSizeEvents':
            result['RequestSizeEvents'] = value
        #requestor info
        elif key == 'Requestor':
            result['Requestor'] = user
        elif key == 'Group':
            result['Group'] = group
        #if emtpy
        elif key in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"] and not value:
            result[key]=[]
        #replace old DBS2 URL
        elif value == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet":
            result[key] = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader'
        #copy the right LFN base
        elif key == 'MergedLFNBase':
            result['MergedLFNBase'] = helper.getMergedLFNBase()
        #TODO deleting timeout so they will move to running-close as soon as they can
        #elif key == 'OpenRunningTimeout':
            #delete entry
        #    continue
        #skip empty entries
        elif not value:
            continue
        elif value != None:
            result[key] = value

    #extend workflow so it will safely start outside of the boundary
    RequestNumEvents = int(result['RequestNumEvents'])
    FirstEvent = int(result['FirstEvent'])
    FirstLumi = int(result['FirstLumi'])
    EventsPerLumi = int(result['EventsPerLumi'])
    FilterEfficiency = float(result['FilterEfficiency'])

    #FirstEvent_NEW > FirstEvent + RequestNumEvents
    #the fist event needs to be oustide the range
    result['FirstEvent'] = FirstEvent + RequestNumEvents + DELTA_EVENTS

    #FirstLumi_NEW > FirstLumi + RequestNumEvents/events_per_job/filterEff
    # same for the first lumi, needs to be after the last lumi
    #get the last lumi of the dataset
    dataset = reqMgrClient.outputdatasetsWorkflow(url, workflow).pop()
    LastLumi = dbs3Client.getMaxLumi(dataset)

    result['FirstLumi'] = LastLumi + DELTA_LUMIS
    #only the desired events    
    result['RequestNumEvents'] = events
    
    #prepend EXT_ to recognize as extension
    result["RequestString"] = 'EXT_'+result["RequestString"]

    #check MonteCarlo
    if result['RequestType']=='MonteCarlo':
        #check assigning parameters
        #seek for events per job on helper
        splitting = helper.listJobSplittingParametersByTask()
        eventsPerJob = 120000
        eventsPerLumi = 100000
        for k, v in splitting.items():
            print k,":",v
            if k.endswith('/Production'):
                if 'events_per_job' in v:
                    eventsPerJob = v['events_per_job']
                elif 'events_per_lumi' in v:
                    eventsPerLumi = v['events_per_lumi']
        result['EventsPerJob'] = eventsPerJob
        #result['EventsPerLumi'] = eventsPerLumi

    #Merged LFN   
    if 'MergedLFNBase' not in result:
        result['MergedLFNBase'] = helper.getMergedLFNBase()
    
    #update information from reqMgr    
    # Add AcquisitionEra, ProcessingString and ProcessingVersion
    result["ProcessingString"] = helper.getProcessingString()
    result["AcquisitionEra"] = helper.getAcquisitionEra()
    #try to parse processing version as an integer, if don't, assign 1
    try:
        result["ProcessingVersion"] = int(helper.getProcessingVersion())
    except ValueError:
        result["ProcessingVersion"] = 1

    return result