Exemplo n.º 1
0
def batchor( url ):
    UC = unifiedConfiguration()
    SI = global_SI()
    CI = campaignInfo()
    BI = batchInfo()
    ## get all workflows in assignment-approved with SubRequestType = relval
    all_wfs = []
    if UC.get("user_relval"):
        users = ','.join(UC.get("user_relval"))
        wfs = getWorkflows(url, 'assignment-approved', details=False, user=users, rtype='TaskChain')
        if wfs:
            # then there is likely work to be done
            all_wfs = getWorkflowsByName(url, wfs, details=True)

    wfs = filter( lambda r :r['SubRequestType'] == 'RelVal' if 'SubRequestType' in r else False, all_wfs)
    ## need a special treatment for those
    hi_wfs = filter( lambda r :r['SubRequestType'] == 'HIRelVal' if 'SubRequestType' in r else False, all_wfs)

    by_campaign = defaultdict(set)
    by_hi_campaign = defaultdict(set)
    for wf in wfs:
        print "Relval:",wf['RequestName'], wf['Campaign']
        by_campaign[wf['Campaign']].add( wf['PrepID'] )


    for wf in hi_wfs:
        print "HI Relval:",wf['RequestName'], wf['Campaign']
        by_hi_campaign[wf['Campaign']].add( wf['PrepID'] )
        
    default_setup = {
        "go" :True,
        "parameters" : {
            "SiteWhitelist": [ "T1_US_FNAL" ],
            "MergedLFNBase": "/store/relval",
            "Team" : "relval",
            "NonCustodialGroup" : "RelVal"
            },
        "custodial_override" : "notape",
        "phedex_group" : "RelVal",
        "lumisize" : -1,
        "fractionpass" : 0.0,
        "maxcopies" : 1
        }
    default_hi_setup = copy.deepcopy( default_setup )

    add_on = {}
    relval_routing = UC.get('relval_routing')
    def pick_one_site( p):
        ## modify the parameters on the spot to have only one site
        if "parameters" in p and "SiteWhitelist" in p["parameters"] and len(p["parameters"]["SiteWhitelist"])>1:
            choose_from = list(set(p["parameters"]["SiteWhitelist"]) & set(SI.sites_ready))
            picked = random.choice( choose_from )
            print "picked",picked,"from",choose_from
            p["parameters"]["SiteWhitelist"] = [picked]
            
    batches = BI.all()
    for campaign in by_campaign:
        if campaign in batches: continue
        ## get a bunch of information
        setup  = copy.deepcopy( default_setup )

        for key in relval_routing:
            if key in campaign:
                ## augment with the routing information
                augment_with = relval_routing[key]
                print "Modifying the batch configuration because of keyword",key
                print "with",augment_with
                setup = deep_update( setup, augment_with )

        pick_one_site( setup )
        add_on[campaign] = setup
        sendLog('batchor','Adding the relval campaigns %s with parameters \n%s'%( campaign, json.dumps( setup, indent=2)),level='critical')
        BI.update( campaign, by_campaign[campaign])
        # now update it in central CouchDB
        setup['name'] = campaign
        wmcoreCamp = parseMongoCampaigns(setup)[0]
        res = createCampaignConfig(wmcoreCamp)
        print "Campaign %s correctly created in ReqMgr2: %s" % (wmcoreCamp['CampaignName'], res)

    for campaign in by_hi_campaign:
        if campaign in batches: continue
        ## get a bunch of information
        setup  = copy.deepcopy( default_hi_setup )
        ##possible_sites = set(["T1_DE_KIT","T1_FR_CCIN2P3"])
        ##hi_site = random.choice(list(possible_sites))
        hi_site = "T2_CH_CERN"
        setup["parameters"]["SiteWhitelist"]=[ hi_site ]

        pick_one_site( setup )
        add_on[campaign] = setup
        sendLog('batchor','Adding the HI relval campaigns %s with parameters \n%s'%( campaign, json.dumps( setup, indent=2)),level='critical')
        BI.update( campaign, by_hi_campaign[campaign])
        # now update it in central CouchDB
        setup['name'] = campaign
        wmcoreCamp = parseMongoCampaigns(setup)[0]
        res = createCampaignConfig(wmcoreCamp)
        print "Campaign %s correctly created in ReqMgr2: %s" % (wmcoreCamp['CampaignName'], res)

    ## only new campaigns in announcement
    for new_campaign in list(set(add_on.keys())-set(CI.all(c_type='relval'))):
        ## this is new, and can be announced as such
        print new_campaign,"is new stuff"
        subject = "Request of RelVal samples batch %s"% new_campaign
        text="""Dear all, 
A new batch of relval workflows was requested.

Batch ID:

%s

Details of the workflows:

https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s

This is an automated message"""%( new_campaign, 
                                  new_campaign,
                                  )


        print subject
        print text
        to = ['*****@*****.**']
        sendEmail(subject, text, destination=to)
        # sendLog('batchor',text, level='critical')

    ## go through all existing campaigns and remove the ones not in use anymore ?
    for old_campaign in CI.all(c_type='relval'):
        all_in_batch = getWorkflowByCampaign(url, old_campaign, details=True)
        if not all_in_batch: continue
        is_batch_done = all(map(lambda s : not s in ['completed','force-complete','running-open','running-closed','acquired','staged','staging','assigned','assignment-approved'], [wf['RequestStatus']for wf in all_in_batch]))
        ## check all statuses
        if is_batch_done:
            #print "batch",old_campaign,"can be closed or removed if necessary"
            #campaigns[old_campaign]['go'] = False ## disable
            CI.pop( old_campaign ) ## or just drop it all together ?
            BI.pop( old_campaign )
            print "batch",old_campaign," configuration was removed"
            res = deleteCampaignConfig(old_campaign)
            print "Campaign %s correctly deleted in ReqMgr2: %s" % (old_campaign, res)


    ## merge all anyways
    CI.update( add_on , c_type = 'relval')
Exemplo n.º 2
0
def closor(url, specific=None, options=None):
    if userLock(): return
    mlock = moduleLock()
    if mlock() and not options.manual: return
    up = componentInfo(soft=['mcm', 'wtc'])
    if not up.check(): return

    UC = unifiedConfiguration()
    CI = campaignInfo()
    BI = batchInfo()
    CloseI = closeoutInfo()

    all_late_files = []

    jump_the_line = options.announce if options else False
    if jump_the_line:
        print "announce option is on. Checking on things on-going ready to be announced"
        wfs = session.query(Workflow).filter(
            Workflow.status.contains('announce')).filter(
                sqlalchemy.not_(Workflow.status.contains('announced'))).all()
    else:
        print "regular option. Checking on things done and to be announced"
        wfs = session.query(Workflow).filter(Workflow.status == 'close').all()

    if specific:
        wfs = [wfo for wfo in wfs if specific in wfo.name]
    wfs_n = [w.name for w in wfs]

    print "unique names?"
    print len(set(wfs_n)) == len(wfs_n)

    held = set()

    print len(wfs), "closing"
    random.shuffle(wfs)
    max_per_round = UC.get('max_per_round').get('closor', None)
    if options.limit: max_per_round = options.limit

    if max_per_round:
        ## order them by priority
        all_closedout = sorted(getWorkflows(url, 'closed-out', details=True),
                               key=lambda r: r['RequestPriority'])
        all_closedout = [r['RequestName'] for r in all_closedout]

        def rank(wfn):
            return all_closedout.index(wfn) if wfn in all_closedout else 0

        wfs = sorted(wfs, key=lambda wfo: rank(wfo.name), reverse=True)
        wfs = wfs[:max_per_round]

    batch_go = {}
    batch_warnings = defaultdict(set)
    batch_extreme_warnings = defaultdict(set)
    batch_goodness = UC.get("batch_goodness")

    closers = []

    print len(wfs), "closing"
    th_start = time.mktime(time.gmtime())

    for iwfo, wfo in enumerate(wfs):
        if specific and not specific in wfo.name: continue
        if not options.manual and (
                'cmsunified_task_HIG-RunIIFall17wmLHEGS-05036__v1_T_200712_005621_4159'
                .lower() in (wfo.name).lower() or
                'pdmvserv_task_HIG-RunIISummer16NanoAODv7-03979__v1_T_200915_013748_1986'
                .lower() in (wfo.name).lower()):
            continue
        closers.append(
            CloseBuster(
                wfo=wfo,
                url=url,
                CI=CI,
                UC=UC,
                jump_the_line=jump_the_line,
                batch_goodness=batch_goodness,
                batch_go=batch_go,
                #stats = stats,
                batch_warnings=batch_warnings,
                batch_extreme_warnings=batch_extreme_warnings,
                all_late_files=all_late_files,
                held=held,
            ))

    run_threads = ThreadHandler(threads=closers,
                                n_threads=options.threads,
                                sleepy=10,
                                timeout=None,
                                verbose=True,
                                label='closor')

    run_threads.start()

    ## waiting on all to complete
    while run_threads.is_alive():
        #print "Waiting on closing threads",time.asctime(time.gmtime())
        time.sleep(5)

    JC = JIRAClient() if up.status.get('jira', False) else None
    print len(
        run_threads.threads), "finished thread to gather information from"
    failed_threads = 0
    for to in run_threads.threads:
        if to.failed:
            failed_threads += 1
            continue
        if to.outs:
            for outO in to.outs:
                out = outO.datasetname
                odb = session.query(Output).filter(
                    Output.datasetname == out).first()
                if not odb:
                    print "adding an output object", out
                    session.add(outO)
                else:
                    odb.date = outO.date

        if to.to_status:
            to.wfo.status = to.to_status
            if JC and to.to_status == "done" and to.wfi:
                jiras = JC.find({"prepid": to.wfi.request['PrepID']})
                for jira in jiras:
                    JC.close(jira.key)

        if to.to_wm_status:
            to.wfo.wm_status = to.to_wm_status
        if to.closing:
            CloseI.pop(to.wfo.name)

        session.commit()

    th_stop = time.mktime(time.gmtime())

    if wfs:
        time_spend_per_workflow = (th_stop - th_start) / float(len(wfs))
        print "Average time spend per workflow is", time_spend_per_workflow

    if float(failed_threads / run_threads.n_threads) > 0:
        sendLog('checkor',
                '%d/%d threads have failed, better check this out' %
                (failed_threads, run_threads.n_threads),
                level='critical')
        sendEmail(
            'checkor', '%d/%d threads have failed, better check this out' %
            (failed_threads, run_threads.n_threads))

    days_late = 0.
    retries_late = 10

    really_late_files = [
        info for info in all_late_files if info['retries'] >= retries_late
    ]
    really_late_files = [
        info for info in really_late_files
        if info['delay'] / (60 * 60 * 24.) >= days_late
    ]

    if really_late_files:
        subject = 'These %d files are lagging for %d days and %d retries announcing dataset \n%s' % (
            len(really_late_files), days_late, retries_late,
            json.dumps(really_late_files, indent=2))
        #sendEmail('waiting for files to announce', subject)
        sendLog('closor', subject, level='warning')
        sendLog('closor', subject)
        print subject
        open('%s/stuck_files.json' % monitor_dir,
             'w').write(json.dumps(really_late_files, indent=2))

    if held:
        sendLog('closor',
                "the workflows below are held up \n%s" %
                ("\n".join(sorted(held))),
                level='critical')

    for bname, go in batch_go.items():
        if go:
            subject = "Release Validation Samples Batch %s" % bname
            issues = ""
            #if batch_warnings[ bname ]:
            #    issues="The following datasets have outstanding completion (<%d%%) issues:\n\n"% batch_goodness
            #    issues+="\n".join( sorted( batch_warnings[ bname ] ))
            #    issues+="\n\n"
            if batch_extreme_warnings[bname]:
                subject = "Low Statistics for %s" % bname
                issues = "The following datasets have outstanding completion (<50%%) issues:\n\n"
                issues += "\n".join(sorted(batch_extreme_warnings[bname]))
                issues += "\n\n"
            elif batch_warnings[bname]:
                issues = "The following datasets have outstanding completion (<%d%%) issues:\n\n" % batch_goodness
                issues += "\n".join(sorted(batch_warnings[bname]))
                issues += "\n\n"
            text = ""
            text += "Dear all,\n\n"
            text += "A batch of release validation workflows has finished.\n\n"
            text += "Batch ID:\n\n"
            text += "%s\n\n" % (bname)
            text += "Detail of the workflows\n\n"
            text += "https://dmytro.web.cern.ch/dmytro/cmsprodmon/requests.php?campaign=%s\n\n" % (
                bname)
            text += "%s\n\n" % (issues)
            text += "This is an automated message.\n\n"
            text += ""
            to = ['*****@*****.**']
            sendEmail(subject, text, destination=to)
            ## just announced ; take it out now.
            BI.pop(bname)
            deleteCampaignConfig(bname)

    if os.path.isfile('.closor_stop'):
        print "The loop on workflows was shortened"
        sendEmail('closor',
                  'Closor loop was shortened artificially using .closor_stop')
        os.system('rm -f .closor_stop')
Exemplo n.º 3
0
def main():
    """
    Execute the whole logic for campaign configuration management
    """
    options = parseArgs()

    client = mongo_client()
    db = client.unified.campaignsConfiguration

    if options.load:
        campaigns = []
        content = json.loads(open(options.load).read())
        for k, v in content.items():
            up = {'name': k}
            #s = {"$set": v}
            #db.update( up, s )
            ## replace the db content
            v['name'] = k
            if options.type: v['type'] = options.type
            db.replace_one(up, v)
            campaigns.append(v)
            print k, v
        replaceCampaigns(campaigns)
        sys.exit(0)

    if options.dump:
        uc = {}
        for content in db.find():
            i = content.pop("_id")
            if content.get('type', None) != options.type:
                continue  ## no relval
            if 'name' not in content:
                db.delete_one({'_id': i})
                print "dropping", i, content, "because it is malformated"
                continue
            uc[content.pop("name")] = content
        print len(uc.keys()), "campaigns damp"
        open(options.dump, 'w').write(json.dumps(uc, indent=2, sort_keys=True))
        sys.exit(0)

    if options.remove:
        if options.name:
            db.delete_one({'name': options.name})
            # and delete it in central couch too
            deleteCampaignConfig(options.name)
        else:
            pass
        sys.exit(0)

    post = {}
    if options.configuration:
        try:
            post.update(json.loads(options.configuration))
        except:
            post.update(json.loads(open(options.configuration).read()))
        post['name'] = options.name
    update = {}
    if options.parameter:
        name, value = options.parameter.split(':', 1)
        ## convert to int or float or object
        try:
            value = int(value)
        except:
            try:
                value = float(value)
            except:
                try:
                    value = json.loads(value)
                except:
                    # as string
                    pass

        if '.' in name:
            path = list(name.split('.'))
            w = update
            for p in path[:-1]:
                w[p] = {}
                w = w[p]
            w[path[-1]] = value
        else:
            update[name] = value

    found = db.find_one({"name": options.name})
    if found:
        up = {'_id': found['_id']}
        if post:
            print "replacing", options.name, "with values", post
            if options.type: post['type'] = options.type
            db.replace_one(up, post)
            ### Alan: can I assume options.name and options.configuration
            # contain the same campaign configuration?!?!
            replaceCampaigns(post)
        elif update:
            ## need to update a value
            if options.type: update['type'] = options.type
            print "updating", options.name, "with values", update
            db.update(up, {"$set": update})
            ### And update it in central CouchDB as well
            thisDoc = deepcopy(found)
            thisDoc.update(update)
            replaceCampaigns(thisDoc)
        else:
            ## use that to show the value in the database
            # not other headers in the output, so that it can be json loadable
            found.pop('name')
            found.pop('_id')
            print json.dumps(found, indent=2, sort_keys=True)
    else:
        if post:
            ## entering a new value
            if options.type: post['type'] = options.type
            post.update({"name": options.name})
            db.insert_one(post)
            createCampaign(post)
        elif update:
            if options.type: update['type'] = options.type
            update.update({"name": options.name})
            db.insert_one(update)
            createCampaign(post)
        else:
            availables = [o["name"] for o in db.find()]
            print options.name, " Not found. ", len(
                availables), "available campaigns \n", "\n\t".join(
                    sorted(availables))