Пример #1
0
def monitor_timestep(jobs):
      
    finished_jobs = []
    while True:
     
        for j in jobs:
            if j in finished_jobs:
                continue
            try: 
                j_done = mig.job_finished(j["job_id"])
            
            except migerror.MigInterfaceError, e:
                log(str(e))
                j_done = False
                
            if j_done:
                output_files = download_result(j)
                if verify_job_output(output_files, j):
                    finished_jobs.append(j)            
                    clean_up_mig_home(output_files)
                else:
                    return 1                    
            
        update(status="waiting for jobs", state=STATE_RUNNING)
        if len(jobs) == len(finished_jobs):
            update(status="iteration finished", state=STATE_RUNNING)
            return 0 # success
        time.sleep(config.POLLING_INTERVAL)
Пример #2
0
def monitor_timestep(jobs):

    finished_jobs = []
    while True:

        for j in jobs:
            if j in finished_jobs:
                continue
            try:
                j_done = mig.job_finished(j["job_id"])

            except migerror.MigInterfaceError, e:
                log(str(e))
                j_done = False

            if j_done:
                output_files = download_result(j)
                if verify_job_output(output_files, j):
                    finished_jobs.append(j)
                    clean_up_mig_home(output_files)
                else:
                    return 1

        update(status="waiting for jobs", state=STATE_RUNNING)
        if len(jobs) == len(finished_jobs):
            update(status="iteration finished", state=STATE_RUNNING)
            return 0  # success
        time.sleep(config.POLLING_INTERVAL)
Пример #3
0
def verify_job_output(output, job):
    for expected_output_file in job["result_files"]:
        if not expected_output_file in output:
            log("Could not get result file %s for job %s" %
                (expected_output_file, str(job)))
            return False
    return True
Пример #4
0
def main_solver(matlab_sh, matlab_bin, files, number_of_jobs, first_timestep, last_timestep):
    """
    The main solver file start grid jobs for every timestep.
    """

    #final_step = config.FINAL_TIMESTEP
    solver_data = {}
    solver_data["timesteps"] = []
    solver_data["pid"] = os.getpid()
    solver_data["name"] = proc_name
    solver_data["start_timestep"] = start_timestep
    solver_data["grid_enabled"] = grid_enabled
    
    os.mkdir(config.results_dir_name) # for storing old result
    
    
    save_solver_data(proc_name, solver_data)
    for t in range(first_timestep, last_timestep-1, -1): # decending from timesteps         
        print "starting new time step: ",t
    #    global timestep
        timestep = str(t)#solver_data["timestep"] = t
        
        
        grid_jobs = submit_jobs(matlab_sh, matlab_bin, files, number_of_jobs, timestep)
        timestep_data = {"jobs" : grid_jobs, "timestep" : timestep}
        solver_data = load_solver_data(proc_name)
        solver_data["timesteps"].append(timestep_data)
        
        save_solver_data(proc_name, solver_data)
        
        update(status="starting iteration", state=STATE_RUNNING)
        log("entering monitor")
        exit_code = monitor_timestep(grid_jobs) # returns when jobs are done
        
        if exit_code:
            print "Monitor error."
            #update(status="Error: Could not find result", state=STATE_FAILED)
            return 1
        
        postprocess(grid_jobs, timestep)
        
        log(timestep+" done. starting next")
        
        
    clean_up_mig_home(files)
    clean_upload_dir()
    return 0
Пример #5
0
def main_solver(matlab_sh, matlab_bin, files, number_of_jobs, first_timestep,
                last_timestep):
    """
    The main solver file start grid jobs for every timestep.
    """

    #final_step = config.FINAL_TIMESTEP
    solver_data = {}
    solver_data["timesteps"] = []
    solver_data["pid"] = os.getpid()
    solver_data["name"] = proc_name
    solver_data["start_timestep"] = start_timestep
    solver_data["grid_enabled"] = grid_enabled

    os.mkdir(config.results_dir_name)  # for storing old result

    save_solver_data(proc_name, solver_data)
    for t in range(first_timestep, last_timestep - 1,
                   -1):  # decending from timesteps
        print "starting new time step: ", t
        #    global timestep
        timestep = str(t)  #solver_data["timestep"] = t

        grid_jobs = submit_jobs(matlab_sh, matlab_bin, files, number_of_jobs,
                                timestep)
        timestep_data = {"jobs": grid_jobs, "timestep": timestep}
        solver_data = load_solver_data(proc_name)
        solver_data["timesteps"].append(timestep_data)

        save_solver_data(proc_name, solver_data)

        update(status="starting iteration", state=STATE_RUNNING)
        log("entering monitor")
        exit_code = monitor_timestep(grid_jobs)  # returns when jobs are done

        if exit_code:
            print "Monitor error."
            #update(status="Error: Could not find result", state=STATE_FAILED)
            return 1

        postprocess(grid_jobs, timestep)

        log(timestep + " done. starting next")

    clean_up_mig_home(files)
    clean_upload_dir()
    return 0
Пример #6
0
def postprocess(jobs, timestep):
    """
    run this code between each iterations. Merges the results using matlab code.
    """
    cmd = "matlab -nodesktop -nojvm -r '%s(%i);quit();'" % (os.path.basename(
        config.postprocessing_code)[:-2], num_jobs)
    log("Post processing....")
    print cmd
    proc = subprocess.Popen(cmd,
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = proc.communicate()
    #if err:
    log(" ".join([out, err]))

    # move the files to the results dir to clean up our working directory
    for j in jobs:
        for f in j["result_files"]:
            src = os.path.join(os.getcwd(), f)
            new_name = "t_" + timestep + "_" + f
            dst = os.path.join(os.getcwd(), config.results_dir_name, new_name)
            if os.path.exists(src):
                shutil.move(src, dst)
            else:
                log("Error: could not find result file %s after post processing."
                    % src)
Пример #7
0
def postprocess(jobs, timestep):
    """
    run this code between each iterations. Merges the results using matlab code.
    """
    cmd = "matlab -nodesktop -nojvm -r '%s(%i);quit();'" % (os.path.basename(config.postprocessing_code)[:-2], num_jobs)
    log("Post processing...." )
    print cmd
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = proc.communicate()
    #if err:
    log(" ".join([out, err]))
    
    
    # move the files to the results dir to clean up our working directory
    for j in jobs:
        for f in j["result_files"]:
            src = os.path.join(os.getcwd(), f)
            new_name = "t_" + timestep + "_" + f
            dst = os.path.join(os.getcwd(), config.results_dir_name, new_name)
            if os.path.exists(src):
                shutil.move(src, dst)
            else:
                log("Error: could not find result file %s after post processing." % src)
Пример #8
0
def verify_job_output(output,job):
    for expected_output_file in job["result_files"]:
        if not expected_output_file in output:
            log("Could not get result file %s for job %s" % (expected_output_file, str(job)))
            return False
    return True