Пример #1
0
def runWL( OutputDir, XMLhandler, NPoolThreads, NoSubmit=0, NoBackground=0, OneFile=0, testid=0, projectid = "default", testerid=0, timediff=0.0 ):
    
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()
    
            
        
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict( DictionaryOfApps, 'runTime', 
                                                       AIStorageUtils.SORT_TYPE_FLOAT, 
                                                       AIStorageUtils.SORT_ASCENDING )

    #    ListOfApps = AIStorageUtils.dict_sortbykey( DictionaryOfApps, AIStorageUtils.SORT_DESCENDING )
    for (id, App) in ListOfApps: 
        print "Found ", App['id'], "due to start at", App['runTime']
        
    NTotalJobs = len( ListOfApps )
    #print "### Found", NTotalJobs, "apps. Sorting...done."
                                                    
    startTime = float(ListOfApps[0][1]['runTime'])
    if startTime < 0.0: startTime = 0.0
    for (id, App) in ListOfApps: 
        App['runTime'] = float(App['runTime']) - startTime
        #print "### runTime:", App['runTime'], "\n"
        if App['runTime'] < 0.0: App['runTime'] = 0.0
        #print "ID", id, "starts in %.3fs." % float(App['runTime']/1000.0)
        
    #-- generate all work units
    try:
        os.mkdir( OutputDir )
    except:
        pass
    FirstSubmission = time.time()
    CommandLinesList = []
    for (id, App) in ListOfApps: 
        #-- generate item
        CommandLineItem = {}
        CommandLineItem['id'] = id
        CommandLineItem['firstSubmission'] = FirstSubmission
        CommandLineItem['startTime'] = float(App['runTime']/1000.0)
        #CommandLineItem['commandLine'] = "drunner -g -e -o -f %s 1> %s.out 2> %s.err &" % (App['jdf'], id, id)
        
        if OneFile == 0:
            StdOutFile = os.path.join( OutputDir, "%s.out" % id )
            StdErrFile = os.path.join( OutputDir, "%s.err" % id )
            ActualCommand = "%s 1> %s 2> %s" % ( App['submitCommand'], StdOutFile, StdErrFile )
            #ActualCommand = "%s 2>%s" % ( App['submitCommand'], StdErrFile )
        else:
            StdOutFile = os.path.join( OutputDir, "onefile.out" )
            StdErrFile = os.path.join( OutputDir, "onefile.err" )
            ActualCommand = "%s 1>> %s 2>> %s" % ( App['submitCommand'], StdOutFile, StdErrFile )
            #ActualCommand = "%s" % ( App['submitCommand'] )

        if NoBackground == 0:
            CommandLineItem['commandLine'] = ActualCommand #+ ' &'
        else:
            CommandLineItem['commandLine'] = ActualCommand
        
	CommandLineItem['stdout'] = StdOutFile
	CommandLineItem['stderr'] = StdErrFile
	CommandLineItem['onefile'] = OneFile        

        #-- amod v.0.12: just generate commands
        CommandLineItem['NoSubmit'] = NoSubmit
        CommandLineItem['testid'] = testid
        CommandLineItem['projectid'] = projectid
        CommandLineItem['testerid'] = testerid
        CommandLineItem['timediff'] = timediff
            
        #-- append item
        #if os.path.exists(App['jdf']):
        CommandLinesList.append(CommandLineItem)
        #else:
        #    print "Could not locate JDF", App['jdf'], "... skipping job"
        
    #-- build a WorkRequest object for each work unit
    requests = ASPNThreadPool.makeRequests(submitJob, CommandLinesList, printSubmitJobResults)
    
    #-- create a pool of NPoolThreads worker threads
    StdOutLock.acquire()
    print "[wl-submit.py] Starting a thread pool with", NPoolThreads, "threads"
    StdOutLock.release()

    submitThreadPool = ASPNThreadPool.ThreadPool(NPoolThreads, StdOutLock)
    
    
    StartSubmissionTime = time.time()
    #-- add all work units into the thread pool
    #   NOTE: We expect the thread pool to be based on Queues,
    #         beacause our applications need to be run at specified times
    #         and the submit job waits until the current work unit is done
    #         -> if we are NOT using Queues, it may happen that a work unit
    #         that needs to be submitted at time T will get submitted much 
    #         later, due to other jobs starting the submission before it,
    #         but waiting for their later start time
    for req in requests:
        submitThreadPool.putRequest(req)
        #DEBUG:print req.args

        StdOutLock.acquire()
        print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % \
              (req.requestID, req.args[0]['id'], req.args[0]['startTime'])
        StdOutLock.release()
    
    #-- wait for all submissions to be completed
    submitThreadPool.wait()
    while 1:
        try:
            submitThreadPool.poll()
            EndSubmissionTime = time.time()
            #print "Main thread working..."
            time.sleep(0.5)
        except (KeyboardInterrupt, ASPNThreadPool.NoResultsPending):
            break
    
    EndSubmissionTime = time.time()
    NTotalJobsInQueue = len(submitThreadPool.workRequests)

    # should send to the database the 'onefile.out' and 'onefile.err' (not tested)

    if OneFile != 0:
        StdOutFile = os.path.join(OutputDir, "onefile.out")
        StdErrFile = os.path.join(OutputDir, "onefile.err")

        try:
            fin = open(StdOutFile)
            lines = (fin.read()).split("\n")

            for line in lines:
                if (len(line) > 1):
                    sLine = "\n" + LOGFILE_PREFIX + str(testid) + "\1" + str(projectid) + "\1" + str(testerid) + "\1" + "0" + "\1" + line + "\n"

                    StdOutLock.acquire()   
                    sys.stdout.write(sLine)
                    StdOutLock.release()

            fin.close()
        except:
                pass

        try:
            fin = open(StdErrFile)          
            lines = (fin.read()).split("\n")

            for line in lines:
                if (len(line) > 1):
                    sLine = "\n" + LOGFILE_PREFIX + str(testid) + "\1" + str(projectid) + "\1" + str(testerid) + "\1" + "0" + "\1" + line + "\n"

                    StdOutLock.acquire()
                    sys.stdout.write(sLine)
                    StdOutLock.release()   

            fin.close()
        except:
                pass

    return StartSubmissionTime, EndSubmissionTime, NTotalJobs, NTotalJobsInQueue
Пример #2
0
def runWL(OutputDir, XMLhandler, NPoolThreads, NoSubmit=0, Background=0, OneFile=0):

    # --- get applications
    DictionaryOfApps = XMLhandler.getDictionaryOfApplications()

    # --- create composite structure manager
    TheCompositeApplicationData = CompositeApplicationData()

    ##    ListOfApps = AIStorageUtils.dict_sortbykey( DictionaryOfApps, AIStorageUtils.SORT_DESCENDING )
    ##    for (id, App) in ListOfApps:
    ##        print "Found ", App['id'], "due to start at", App['runTime']
    ##
    # -- sort jobs
    ListOfApps = AIStorageUtils.dict_sortbyvalue_dict(
        DictionaryOfApps, "runTime", AIStorageUtils.SORT_TYPE_FLOAT, AIStorageUtils.SORT_ASCENDING
    )
    NTotalJobs = len(ListOfApps)
    print "Found", NTotalJobs, "apps. Sorting...done."

    # Modification - C.S.: make all the tasks have the start time 0
    # startTime = float(ListOfApps[0][1]['runTime'])
    # if startTime < 0.0: startTime = 0.0
    startTime = 0.0

    # -- correct start times and add all applications to the composite structure manager
    for (id, App) in ListOfApps:
        App["runTime"] = 0
        # App['runTime'] = float(App['runTime']) - startTime
        # if App['runTime'] < 0.0: App['runTime'] = 0.0
        print "ID", id, "starts in %.3fs." % float(App["runTime"] / 1000.0)
        # add the 'dependsOn' key if missing
        if "dependsOn" not in App:
            App["dependsOn"] = []
        TheCompositeApplicationData.addJob(id, App)

        # -- create all 'enables' relations
    TheCompositeApplicationData.buildEnablesRelations()
    # -- mark all the starting jobs as 'can run'
    for id in TheCompositeApplicationData.JobsWithDeps:
        TheCompositeApplicationData.triggerCanRunCheck(id)

    # --- generate all work units
    try:
        os.mkdir(OutputDir)
    except:
        pass

    # --- build a WorkRequest object for each work unit
    FirstSubmission = time.time()
    CommandLinesList = []
    for (id, App) in ListOfApps:
        # -- generate item
        CommandLineItem = {}
        CommandLineItem[".CompositeApplicationData"] = TheCompositeApplicationData
        CommandLineItem["id"] = id
        CommandLineItem["firstSubmission"] = FirstSubmission
        CommandLineItem["startTime"] = float(App["runTime"] / 1000.0)
        # CommandLineItem['commandLine'] = "drunner -g -e -o -f %s 1> %s.out 2> %s.err &" % (App['jdf'], id, id)

        if OneFile == 0:
            StdOutFile = os.path.join(OutputDir, "%s.out" % id)
            StdErrFile = os.path.join(OutputDir, "%s.err" % id)
            ActualCommand = "%s 1> %s 2> %s" % (App["submitCommand"], StdOutFile, StdErrFile)
        else:
            StdOutFile = os.path.join(OutputDir, "onefile.out")
            StdErrFile = os.path.join(OutputDir, "onefile.err")
            ActualCommand = "%s 1>> %s 2>> %s" % (App["submitCommand"], StdOutFile, StdErrFile)
        if Background == 1:
            CommandLineItem["commandLine"] = ActualCommand + " &"
        else:
            CommandLineItem["commandLine"] = ActualCommand

        # -- amod v.0.12: just generate commands
        CommandLineItem["NoSubmit"] = NoSubmit

        # -- append item
        if os.path.exists(App["jdf"]):
            CommandLinesList.append(CommandLineItem)
        else:
            print "Could not locate JDF", App["jdf"], "... skipping job"

    requests = ASPNThreadPool.makeRequests(runJob, CommandLinesList, printJobResults)

    # --- create a pool of NPoolThreads worker threads
    print "[wl-exec-dagman.py] Starting a thread pool with", NPoolThreads, "threads"
    submitThreadPool = ASPNThreadPool.ThreadPool(NPoolThreads, StdOutLock)

    StartSubmissionTime = time.time()
    # --- add all work units into the thread pool
    #   NOTE: We expect the thread pool to be based on Queues,
    #         beacause our applications need to be run at specified times
    #         and the submit job waits until the current work unit is done
    #         -> if we are NOT using Queues, it may happen that a work unit
    #         that needs to be submitted at time T will get submitted much
    #         later, due to other jobs starting the submission before it,
    #         but waiting for their later start time

    # Modification - corina: the requests are put in the thread pool only when
    # their dependencies are satisfied
    requestsBkp = requests[:]
    for req in requestsBkp:
        reqId = req.args[0]["id"]
        # take only the runnable jobs
        if TheCompositeApplicationData.isRunnable(reqId):
            submitThreadPool.putRequest(req)
            # remove the request from the list if it was submitted to the pool
            requests.remove(req)
            # DEBUG:print req.args
            print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % (
                req.requestID,
                req.args[0]["id"],
                req.args[0]["startTime"],
            )

        # --- wait for all submissions to be completed
        # submitThreadPool.wait()
    while 1:
        try:
            submitThreadPool.poll()
            EndSubmissionTime = time.time()
            time.sleep(0.5)
        ##            if TheCompositeApplicationData.isCompositeApplicationFinished():
        ##                #submitThreadPool.wait()
        ##                EndSubmissionTime = time.time()
        ##                break
        ##            time.sleep(1)
        ##            #print "Main thread working..."

        except ASPNThreadPool.NoResultsPending:
            # -- check that all jobs have actually finished or failed
            if TheCompositeApplicationData.isCompositeApplicationFinished():
                EndSubmissionTime = time.time()
                break
            else:
                # see if we have some more runnable jobs and add them to the pool
                requestsBkp2 = requests[:]
                for req in requestsBkp2:
                    reqId = req.args[0]["id"]
                    if TheCompositeApplicationData.isRunnable(reqId):
                        submitThreadPool.putRequest(req)
                        requests.remove(req)
                        # DEBUG:print req.args
                        print "[Pool] Work request #%s added (id=%s, start time=%.3f)." % (
                            req.requestID,
                            req.args[0]["id"],
                            req.args[0]["startTime"],
                        )

                print "[wl-exec-dagman] Got ASPNThreadPool.NoResultsPending"
                print "         All:", TheCompositeApplicationData.TotalJobs, "Done:", TheCompositeApplicationData.TotalSuccessful, "Failed:", TheCompositeApplicationData.TotalFailed
                time.sleep(2)
        except KeyboardInterrupt:
            break
        except:
            print ">>>" + traceback.print_exc()
            raise Exception, "aaaaaaaaaaaaaaaaaaaaaaa"

    NTotalJobsInQueue = len(submitThreadPool.workRequests)
    ##    print ">>>", "NTotalJobsInQueue:", NTotalJobsInQueue
    ##
    ##    #-- mark all the starting jobs as 'can run'
    ##    for id in TheCompositeApplicationData.JobsWithDeps:
    ##        print "ID", id, "isFailed:", TheCompositeApplicationData.isFailed(id), "isSuccessful:", TheCompositeApplicationData.isSuccessful(id)

    return StartSubmissionTime, EndSubmissionTime, NTotalJobs, NTotalJobsInQueue