Python Job.submit примеры использования

Язык программирования: Python

Пространство имен/Пакет: condorpy

Класс/Тип: Job

Метод/Функция: submit

Примеров на hotexamples.com: 4

Python Job.submit - 4 примера найдено. Это лучшие примеры Python кода для condorpy.Job.submit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Job(12)

set(4)

wait(3)

__copy__(2)

__repr__(2)

__str__(2)

delete(2)

get(2)

submit(2)

sync_remote_output(2)

__deepcopy__(1)

_cluster_id(1)

_num_jobs(1)

_remote_id(1)

_resolve_attribute(1)

_write_job_file(1)

Пример #1

Показать файл

Файл: test_integraiton.py Проект: rodrigoegimenez/condorpy

class TestIntegration(unittest.TestCase):

    expected = None
    actual = None
    msg = None
    base_dir = os.path.join(os.path.dirname(__file__))

    @property
    def output(self):
        return '%s\nExpected: %s\nActual:   %s\n' % (self.msg, self.expected,
                                                     self.actual)

    @property
    def assert_args(self):
        return (self.expected, self.actual, self.output)

    def setUp(self):
        self.job_name = 'job_name'
        self.job = Job(self.job_name)

    def tearDown(self):
        pass

    def test_submit(self):
        working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir')

        self.job = Job('remote_test',
                       Templates.vanilla_transfer_files,
                       host='localhost',
                       username=os.environ['USER'],
                       private_key='~/.ssh/id_rsa',
                       remote_input_files=['../copy_test.py', 'input.txt'],
                       transfer_input_files='../input.txt',
                       executable=os.path.join(self.base_dir, 'test_files',
                                               'copy_test.py'),
                       working_directory=working_dir)

        remote_base_path = os.path.expanduser('~/' + self.job._remote_id)
        if os.path.exists(remote_base_path):
            raise
        self.job.submit()
        self.assertTrue(os.path.exists(remote_base_path))
        self.job.wait()
        self.job.sync_remote_output()
        local_output = os.path.join(working_dir, self.job.name)
        self.assertTrue(os.path.exists(local_output))
        output = os.path.join(local_output, 'output.txt')

        self.assertTrue(os.path.exists(output))
        shutil.rmtree(remote_base_path)
        shutil.rmtree(local_output)

Пример #2

Показать файл

Файл: rapid_process.py Проект: CI-WATER/spt_ecmwf_autorapid_process

def run_ecmwf_rapid_process(rapid_executable_location, #path to RAPID executable
                            rapid_io_files_location, #path ro RAPID input/output directory
                            ecmwf_forecast_location, #path to ECMWF forecasts
                            subprocess_log_directory, #path to store HTCondor/multiprocess logs
                            main_log_directory, #path to store main logs
                            data_store_url="", #CKAN API url
                            data_store_api_key="", #CKAN API Key,
                            data_store_owner_org="", #CKAN owner organization
                            app_instance_id="", #Streamflow Prediction tool instance ID
                            sync_rapid_input_with_ckan=False, #match Streamflow Prediciton tool RAPID input
                            download_ecmwf=True, #Download recent ECMWF forecast before running,
                            date_string=None, #string of date of interest
                            ftp_host="", #ECMWF ftp site path
                            ftp_login="", #ECMWF ftp login name
                            ftp_passwd="", #ECMWF ftp password
                            ftp_directory="", #ECMWF ftp directory
                            upload_output_to_ckan=False, #upload data to CKAN and remove local copy
                            delete_output_when_done=False, #delete all output data from this code
                            initialize_flows=False, #use forecast to initialize next run
                            era_interim_data_location="", #path to ERA Interim return period data 
                            create_warning_points=False, #generate waring points for Streamflow Prediction Tool
                            autoroute_executable_location="", #location of AutoRoute executable
                            autoroute_io_files_location="", #path to AutoRoute input/outpuf directory
                            geoserver_url='', #url to API endpoint ending in geoserver/rest
                            geoserver_username='', #username for geoserver
                            geoserver_password='', #password for geoserver
                            mp_mode='htcondor', #valid options are htcondor and multiprocess,
                            mp_execute_directory='',#required if using multiprocess mode
                            ):
    """
    This it the main ECMWF RAPID process
    """
    time_begin_all = datetime.datetime.utcnow()
    if date_string == None:
        date_string = time_begin_all.strftime('%Y%m%d')

    if mp_mode == "multiprocess":
        if not mp_execute_directory or not os.path.exists(mp_execute_directory):
            raise Exception("If mode is multiprocess, mp_execute_directory is required ...")
            
    #date_string = datetime.datetime(2016,2,12).strftime('%Y%m%d')
    local_scripts_location = os.path.dirname(os.path.realpath(__file__))

    if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
        #sync with data store
        ri_manager = RAPIDInputDatasetManager(data_store_url,
                                              data_store_api_key,
                                              'ecmwf',
                                              app_instance_id)
        ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input'))

    #clean up old log files
    clean_logs(subprocess_log_directory, main_log_directory)

    #get list of correclty formatted rapid input directories in rapid directory
    rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input"))
    
    if download_ecmwf and ftp_host:
        #download all files for today
        ecmwf_folders = sorted(download_all_ftp(ecmwf_forecast_location,
                                                'Runoff.%s*.netcdf.tar*' % date_string,
                                                ftp_host,
                                                ftp_login,
                                                ftp_passwd,
                                                ftp_directory))
    else:
        ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location,
                                                 'Runoff.'+date_string+'*.netcdf')))
    data_manager = None
    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #init data manager for CKAN
        data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                data_store_api_key,
                                                data_store_owner_org)

    #ADD SEASONAL INITIALIZATION WHERE APPLICABLE
    if initialize_flows:
        initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folders[0])
        seasonal_init_job_list = []
        for rapid_input_directory in rapid_input_directories:
            seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            #add seasonal initialization if no initialization file and historical Qout file exists
            if era_interim_data_location and os.path.exists(era_interim_data_location):
                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    historical_qout_file = glob(os.path.join(era_interim_watershed_directory, "Qout*.nc"))
                    if historical_qout_file:
                        seasonal_init_job_list.append((historical_qout_file[0], 
                                                       seasonal_master_watershed_input_directory,
                                                       initial_forecast_date_timestep))
        if seasonal_init_job_list:
            #use multiprocessing instead of htcondor due to potential for huge file sizes
            if len(seasonal_init_job_list) > 1:
                seasonal_pool = mp_Pool()
                seasonal_pool.imap(compute_seasonal_initial_rapid_flows_multicore_worker,
                                   seasonal_init_job_list,
                                   chunksize=1)
                seasonal_pool.close()
                seasonal_pool.join()
            else:
                compute_seasonal_initial_rapid_flows_multicore_worker(seasonal_init_job_list[0])

    #prepare ECMWF files
    master_job_info_list = []
    for ecmwf_folder in ecmwf_folders:
        ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'full_*.runoff.netcdf')) + \
                          glob(os.path.join(ecmwf_folder,'*.52.205.*.runoff.netcdf'))
        #look for new version of forecasts
        if not ecmwf_forecasts:
            ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'*.runoff.nc'))
            
        #make the largest files first
        ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

        forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folder)
        print forecast_date_timestep
        #submit jobs to downsize ecmwf files to watershed
        iteration = 0
        rapid_watershed_jobs = {}
        for rapid_input_directory in rapid_input_directories:
            #keep list of jobs
            rapid_watershed_jobs[rapid_input_directory] = {
                                                            'jobs': [], 
                                                            'jobs_info': []
                                                           }
            print "Running forecasts for:", rapid_input_directory, os.path.basename(ecmwf_folder)
            watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
            master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output',
                                                              rapid_input_directory, forecast_date_timestep)
            #add USGS gage data to initialization file
            if initialize_flows:
                #update intial flows with usgs data
                update_inital_flows_usgs(master_watershed_input_directory, 
                                         forecast_date_timestep)
            
            #create jobs for HTCondor
            for watershed_job_index, forecast in enumerate(ecmwf_forecasts):
                ensemble_number = get_ensemble_number_from_forecast(forecast)
                try:
                    os.makedirs(master_watershed_outflow_directory)
                except OSError:
                    pass

                #initialize HTCondor Logging Directory
                subprocess_forecast_log_dir = os.path.join(subprocess_log_directory, forecast_date_timestep)
                try:
                    os.makedirs(subprocess_forecast_log_dir)
                except OSError:
                    pass
                
                #get basin names
                outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number)
                node_rapid_outflow_file = outflow_file_name
                master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name)

                job_name = 'job_%s_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number, iteration)
                if mp_mode == "htcondor":
                    #create job to downscale forecasts for watershed
                    job = CJob(job_name, tmplt.vanilla_transfer_files)
                    job.set('executable',os.path.join(local_scripts_location,'htcondor_ecmwf_rapid.py'))
                    job.set('transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, local_scripts_location))
                    job.set('initialdir', subprocess_forecast_log_dir)
                    job.set('arguments', '%s %s %s %s %s %s' % (forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(),
                                                                rapid_executable_location, initialize_flows))
                    job.set('transfer_output_remaps',"\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file))
                    job.submit()
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append(job)
                    rapid_watershed_jobs[rapid_input_directory]['jobs_info'].append({'watershed' : watershed,
                                                                                     'subbasin' : subbasin,
                                                                                     'outflow_file_name' : master_rapid_outflow_file,
                                                                                     'forecast_date_timestep' : forecast_date_timestep,
                                                                                     'ensemble_number': ensemble_number,
                                                                                     'master_watershed_outflow_directory': master_watershed_outflow_directory,
                                                                                     })
                elif mp_mode == "multiprocess":
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append((forecast,
                                                                                forecast_date_timestep,
                                                                                watershed.lower(),
                                                                                subbasin.lower(),
                                                                                rapid_executable_location,
                                                                                initialize_flows,
                                                                                job_name,
                                                                                master_rapid_outflow_file,
                                                                                master_watershed_input_directory,
                                                                                mp_execute_directory,
                                                                                subprocess_forecast_log_dir,
                                                                                watershed_job_index))
##                    run_ecmwf_rapid_multiprocess_worker((forecast,
##                                                         forecast_date_timestep,
##                                                         watershed.lower(),
##                                                         subbasin.lower(),
##                                                         rapid_executable_location,
##                                                         initialize_flows,
##                                                         job_name,
##                                                         master_rapid_outflow_file,
##                                                         master_watershed_input_directory,
##                                                         mp_execute_directory,
##                                                         subprocess_forecast_log_dir,                     
##                                                         watershed_job_index))
                else:
                    raise Exception("ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ...")
                iteration += 1
        
        
        for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.iteritems():
            #add sub job list to master job list
            master_job_info_list = master_job_info_list + watershed_job_info['jobs_info']
            if mp_mode == "htcondor":
                #wait for jobs to finish then upload files
                for job_index, job in enumerate(watershed_job_info['jobs']):
                    job.wait()
                    #upload file when done
                    if data_manager:
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
            elif mp_mode == "multiprocess":
                pool_main = mp_Pool()
                multiprocess_worker_list = pool_main.imap_unordered(run_ecmwf_rapid_multiprocess_worker, 
                                                                    watershed_job_info['jobs'], 
                                                                    chunksize=1)
                if data_manager:
                    for multi_job_output in multiprocess_worker_list:
                        job_index = multi_job_output[0]
                        #upload file when done
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
                #just in case ...
                pool_main.close()
                pool_main.join()

            #when all jobs in watershed are done, generate warning points
            if create_warning_points:
                watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)

                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    print "Generating Warning Points for", watershed, subbasin, "from", forecast_date_timestep
                    era_interim_files = glob(os.path.join(era_interim_watershed_directory, "return_period*.nc"))
                    if era_interim_files:
                        try:
                            generate_warning_points(forecast_directory, era_interim_files[0], forecast_directory, threshold=10)
                            if upload_output_to_ckan and data_store_url and data_store_api_key:
                                data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep)
                                data_manager.zip_upload_warning_points_in_directory(forecast_directory)
                        except Exception, ex:
                            print ex
                            pass
                    else:
                        print "No ERA Interim file found. Skipping ..."
                else:
                    print "No ERA Interim directory found for", rapid_input_directory, ". Skipping warning point generation..."
            

        #initialize flows for next run
        if initialize_flows:
            #create new init flow files/generate warning point files
            for rapid_input_directory in rapid_input_directories:
                input_directory = os.path.join(rapid_io_files_location, 
                                               'input', 
                                               rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)
                if os.path.exists(forecast_directory):
                    #loop through all the rapid_namelist files in directory
                    watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                    if initialize_flows:
                        print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep
                        basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin)
                        try:
                            compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep)
                        except Exception, ex:
                            print ex
                            pass

Пример #3

Показать файл

def run_ecmwf_forecast_process(
        rapid_executable_location,  # path to RAPID executable
        rapid_io_files_location,  # path ro RAPID input/output directory
        ecmwf_forecast_location,  # path to ECMWF forecasts
        subprocess_log_directory,  # path to store HTCondor/multiprocess logs
        main_log_directory,  # path to store main logs
        region="",  #1 of the 12 partitioned ECMWF files. Leave empty if using global,
        data_store_url="",  # CKAN API url
        data_store_api_key="",  # CKAN API Key,
        data_store_owner_org="",  # CKAN owner organization
        app_instance_id="",  # Streamflow Prediction tool instance ID
        sync_rapid_input_with_ckan=False,  # match Streamflow Prediciton tool RAPID input
        download_ecmwf=True,  # Download recent ECMWF forecast before running,
        date_string="",  # string of date of interest
        ftp_host="",  # ECMWF ftp site path
        ftp_login="",  # ECMWF ftp login name
        ftp_passwd="",  # ECMWF ftp password
        ftp_directory="",  # ECMWF ftp directory
        delete_past_ecmwf_forecasts=True,  # Deletes all past forecasts before next run
        upload_output_to_ckan=False,  # upload data to CKAN and remove local copy
        delete_output_when_done=False,  # delete all output data from this code
        initialize_flows=False,  # use forecast to initialize next run
        warning_flow_threshold=100,  # flows below this threshold will be ignored
        era_interim_data_location="",  # path to ERA Interim return period data
        create_warning_points=False,  # generate waring points for Streamflow Prediction Tool
        autoroute_executable_location="",  # location of AutoRoute executable
        autoroute_io_files_location="",  # path to AutoRoute input/outpuf directory
        geoserver_url="",  # url to API endpoint ending in geoserver/rest
        geoserver_username="",  # username for geoserver
        geoserver_password="",  # password for geoserver
        mp_mode='htcondor',  # valid options are htcondor and multiprocess,
        mp_execute_directory="",  # required if using multiprocess mode
        initialization_time_step=12,  # time step of ECMWF Forecast Process, in hours
        #doesn't appear to be used MJS 8/23/2020... watersheds_with_dams_list=[], # a list of all watersheds where dam outflows are being forced
        #doesn't appear to be used, MJS 8/23/2020... stream_ids_with_dams_dict={}, # a dictionary with the watershed key and a value of a list of stream IDs where dams are located
        #doesn't appear to be used, MJS 8/23/2020... dam_outflows={} # a dictionary with the key as a stream ID and a value of a list of outflows
    BS_opt_dam=False,
        IS_dam_tot=0,
        IS_dam_use=0,
        dam_tot_id_file="",
        dam_use_id_file="",
        dam_file=""):
    """
    This it the main ECMWF RAPID forecast process
    """
    time_begin_all = datetime.datetime.utcnow()

    LOCAL_SCRIPTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
    LOCK_INFO_FILE = os.path.join(main_log_directory,
                                  "spt_compute_ecmwf_run_info_lock.txt")

    log_file_path = os.path.join(
        main_log_directory, "spt_compute_ecmwf_{0}.log".format(
            time_begin_all.strftime("%y%m%d%H%M%S")))

    with CaptureStdOutToLog(log_file_path):

        if not CONDOR_ENABLED and mp_mode == 'htcondor':
            raise ImportError(
                "condorpy is not installed. Please install condorpy to use the 'htcondor' option."
            )

        if not AUTOROUTE_ENABLED and autoroute_executable_location and autoroute_io_files_location:
            raise ImportError(
                "AutoRoute is not enabled. Please install tethys_dataset_services"
                " and AutoRoutePy to use the AutoRoute option.")

        if mp_mode == "multiprocess":
            if not mp_execute_directory or not os.path.exists(
                    mp_execute_directory):
                raise Exception(
                    "If mode is multiprocess, mp_execute_directory is required ..."
                )

        if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
            # sync with data store
            ri_manager = RAPIDInputDatasetManager(data_store_url,
                                                  data_store_api_key, 'ecmwf',
                                                  app_instance_id)
            ri_manager.sync_dataset(
                os.path.join(rapid_io_files_location, 'input'))

        # clean up old log files
        clean_logs(subprocess_log_directory,
                   main_log_directory,
                   log_file_path=log_file_path)

        data_manager = None
        if upload_output_to_ckan and data_store_url and data_store_api_key:
            if not SPT_DATASET_ENABLED:
                raise ImportError(
                    "spt_dataset_manager is not installed. "
                    "Please install spt_dataset_manager to use the 'ckan' options."
                )

            # init data manager for CKAN
            data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                    data_store_api_key,
                                                    data_store_owner_org)

        # get list of correclty formatted rapid input directories in rapid directory
        rapid_input_directories = get_valid_watershed_list(
            os.path.join(rapid_io_files_location, "input"))

        if download_ecmwf and ftp_host:
            # get list of folders to download
            ecmwf_folders = sorted(
                get_ftp_forecast_list(
                    'Runoff.%s*%s*.netcdf.tar*' % (date_string, region),
                    ftp_host, ftp_login, ftp_passwd, ftp_directory))
        else:
            # get list of folders to run
            ecmwf_folders = sorted(
                glob(
                    os.path.join(ecmwf_forecast_location,
                                 'Runoff.' + date_string + '*.netcdf')))

        # LOAD LOCK INFO FILE
        last_forecast_date = datetime.datetime.utcfromtimestamp(0)
        if os.path.exists(LOCK_INFO_FILE):
            with open(LOCK_INFO_FILE) as fp_lock_info:
                previous_lock_info = json.load(fp_lock_info)

            if previous_lock_info['running']:
                print("Another SPT ECMWF forecast process is running.\n"
                      "The lock file is located here: {0}\n"
                      "If this is an error, you have two options:\n"
                      "1) Delete the lock file.\n"
                      "2) Edit the lock file and set \"running\" to false. \n"
                      "Then, re-run this script. \n Exiting ...".format(
                          LOCK_INFO_FILE))
                return
            else:
                last_forecast_date = datetime.datetime.strptime(
                    previous_lock_info['last_forecast_date'], '%Y%m%d%H')
                run_ecmwf_folders = []
                for ecmwf_folder in ecmwf_folders:
                    # get date
                    forecast_date = get_datetime_from_forecast_folder(
                        ecmwf_folder)
                    # if more recent, add to list
                    # check to determine if forecast time step is 12 or 24 hours
                    if initialization_time_step == 24:
                        if forecast_date > last_forecast_date and forecast_date.hour != 12:
                            run_ecmwf_folders.append(ecmwf_folder)
                    elif initialization_time_step == 12:
                        if forecast_date > last_forecast_date:
                            run_ecmwf_folders.append(ecmwf_folder)
                ecmwf_folders = run_ecmwf_folders

        if not ecmwf_folders:
            print("No new forecasts found to run. Exiting ...")
            return

        # GENERATE NEW LOCK INFO FILE
        update_lock_info_file(LOCK_INFO_FILE, True,
                              last_forecast_date.strftime('%Y%m%d%H'))

        # rapid_input_directories_sub = [rapid_input_directory for rapid_input_directory in rapid_input_directories if hydroshed_index in rapid_input_directory]
        # Try/Except added for lock file
        try:
            # ADD SEASONAL INITIALIZATION WHERE APPLICABLE
            if initialize_flows:
                initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folders[0])
                seasonal_init_job_list = []
                for rapid_input_directory in rapid_input_directories:
                    seasonal_master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    # add seasonal initialization if no initialization file and historical Qout file exists
                    if era_interim_data_location and os.path.exists(
                            era_interim_data_location):
                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            # INITIALIZE FROM SEASONAL AVERAGE FILE
                            seasonal_streamflow_file = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "seasonal_average*.nc"))
                            if seasonal_streamflow_file:
                                seasonal_init_job_list.append(
                                    (seasonal_streamflow_file[0],
                                     seasonal_master_watershed_input_directory,
                                     initial_forecast_date_timestep,
                                     "seasonal_average_file"))
                            else:
                                # INITIALIZE FROM HISTORICAL STREAMFLOW FILE
                                historical_qout_file = glob(
                                    os.path.join(
                                        era_interim_watershed_directory,
                                        "Qout*.nc"))
                                if historical_qout_file:
                                    seasonal_init_job_list.append((
                                        historical_qout_file[0],
                                        seasonal_master_watershed_input_directory,
                                        initial_forecast_date_timestep,
                                        "historical_streamflow_file"))
                if seasonal_init_job_list:
                    # use multiprocessing instead of htcondor due to potential for huge file sizes
                    if len(seasonal_init_job_list) > 1:
                        seasonal_pool = mp_Pool()
                        seasonal_pool.imap(
                            compute_seasonal_initial_rapid_flows_multicore_worker,
                            seasonal_init_job_list,
                            chunksize=1)
                        seasonal_pool.close()
                        seasonal_pool.join()
                    else:
                        compute_seasonal_initial_rapid_flows_multicore_worker(
                            seasonal_init_job_list[0])
            # ----------------------------------------------------------------------
            # BEGIN ECMWF-RAPID FORECAST LOOP
            # ----------------------------------------------------------------------
            master_job_info_list = []
            for ecmwf_folder in ecmwf_folders:
                if download_ecmwf:
                    # download forecast
                    ecmwf_folder = download_and_extract_ftp(
                        ecmwf_forecast_location, ecmwf_folder, ftp_host,
                        ftp_login, ftp_passwd, ftp_directory,
                        delete_past_ecmwf_forecasts)

                # get list of forecast files
                ecmwf_forecasts = glob(
                    os.path.join(ecmwf_folder, '*.runoff.%s*nc' % region))

                # look for old version of forecasts
                if not ecmwf_forecasts:
                    ecmwf_forecasts = glob(os.path.join(ecmwf_folder, 'full_*.runoff.netcdf')) + \
                                      glob(os.path.join(ecmwf_folder, '*.52.205.*.runoff.netcdf'))

                if not ecmwf_forecasts:
                    print("ERROR: Forecasts not found in folder. Exiting ...")
                    update_lock_info_file(
                        LOCK_INFO_FILE, False,
                        last_forecast_date.strftime('%Y%m%d%H'))
                    return

                # make the largest files first
                ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

                forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folder)
                print("Running ECMWF Forecast: {0}".format(
                    forecast_date_timestep))

                # submit jobs to downsize ecmwf files to watershed
                rapid_watershed_jobs = {}
                for rapid_input_directory in rapid_input_directories:

                    # keep list of jobs
                    rapid_watershed_jobs[rapid_input_directory] = {
                        'jobs': [],
                        'jobs_info': []
                    }
                    print("Running forecasts for: {0} {1}".format(
                        rapid_input_directory, os.path.basename(ecmwf_folder)))

                    watershed, subbasin = get_watershed_subbasin_from_folder(
                        rapid_input_directory)
                    master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    master_watershed_outflow_directory = os.path.join(
                        rapid_io_files_location, 'output',
                        rapid_input_directory, forecast_date_timestep)
                    try:
                        os.makedirs(master_watershed_outflow_directory)
                    except OSError:
                        pass

                    # initialize HTCondor/multiprocess Logging Directory
                    subprocess_forecast_log_dir = os.path.join(
                        subprocess_log_directory, forecast_date_timestep)
                    try:
                        os.makedirs(subprocess_forecast_log_dir)
                    except OSError:
                        pass

                    # add USGS gage data to initialization file
                    if initialize_flows:
                        # update intial flows with usgs data
                        update_inital_flows_usgs(
                            master_watershed_input_directory,
                            forecast_date_timestep)

                    # create jobs for HTCondor/multiprocess
                    for watershed_job_index, forecast in enumerate(
                            ecmwf_forecasts):
                        ensemble_number = get_ensemble_number_from_forecast(
                            forecast)

                        # get basin names
                        outflow_file_name = 'Qout_%s_%s_%s.nc' % (
                            watershed.lower(), subbasin.lower(),
                            ensemble_number)
                        node_rapid_outflow_file = outflow_file_name
                        master_rapid_outflow_file = os.path.join(
                            master_watershed_outflow_directory,
                            outflow_file_name)

                        job_name = 'job_%s_%s_%s_%s' % (forecast_date_timestep,
                                                        watershed, subbasin,
                                                        ensemble_number)

                        rapid_watershed_jobs[rapid_input_directory][
                            'jobs_info'].append({
                                'watershed':
                                watershed,
                                'subbasin':
                                subbasin,
                                'outflow_file_name':
                                master_rapid_outflow_file,
                                'forecast_date_timestep':
                                forecast_date_timestep,
                                'ensemble_number':
                                ensemble_number,
                                'master_watershed_outflow_directory':
                                master_watershed_outflow_directory,
                                'data_manager':
                                data_manager  # added this to try to upload forecast in mp
                            })
                        if mp_mode == "htcondor":
                            # create job to downscale forecasts for watershed
                            job = CJob(job_name, tmplt.vanilla_transfer_files)
                            job.set(
                                'executable',
                                os.path.join(LOCAL_SCRIPTS_DIRECTORY,
                                             'htcondor_ecmwf_rapid.py'))
                            job.set(
                                'transfer_input_files', "%s, %s, %s" %
                                (forecast, master_watershed_input_directory,
                                 LOCAL_SCRIPTS_DIRECTORY))
                            job.set('initialdir', subprocess_forecast_log_dir)
                            job.set(
                                'arguments', '%s %s %s %s %s %s' %
                                (forecast, forecast_date_timestep,
                                 watershed.lower(), subbasin.lower(),
                                 rapid_executable_location, initialize_flows))
                            job.set(
                                'transfer_output_remaps',
                                "\"%s = %s\"" % (node_rapid_outflow_file,
                                                 master_rapid_outflow_file))
                            job.submit()
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append(job)
                        elif mp_mode == "multiprocess":
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append((
                                    forecast,
                                    forecast_date_timestep,
                                    watershed.lower(),
                                    subbasin.lower(),
                                    rapid_executable_location,
                                    initialize_flows,
                                    job_name,
                                    master_rapid_outflow_file,
                                    master_watershed_input_directory,
                                    mp_execute_directory,
                                    subprocess_forecast_log_dir,
                                    watershed_job_index,
                                    initialization_time_step,
                                    # dam arguments included, MJS 8/23/2020 ........
                                    BS_opt_dam,
                                    IS_dam_tot,
                                    IS_dam_use,
                                    dam_tot_id_file,
                                    dam_use_id_file,
                                    dam_file))
                            # COMMENTED CODE FOR DEBUGGING SERIALLY
                            ##                    run_ecmwf_rapid_multiprocess_worker((forecast,
                            ##                                                         forecast_date_timestep,
                            ##                                                         watershed.lower(),
                            ##                                                         subbasin.lower(),
                            ##                                                         rapid_executable_location,
                            ##                                                         initialize_flows,
                            ##                                                         job_name,
                            ##                                                         master_rapid_outflow_file,
                            ##                                                         master_watershed_input_directory,
                            ##                                                         mp_execute_directory,
                            ##                                                         subprocess_forecast_log_dir,
                            ##                                                         watershed_job_index
                            ##                                                         initialization_time_step))
                        else:
                            raise Exception(
                                "ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ..."
                            )

                for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.items(
                ):

                    # add sub job list to master job list
                    master_job_info_list = master_job_info_list + watershed_job_info[
                        'jobs_info']
                    if mp_mode == "htcondor":
                        # wait for jobs to finish then upload files
                        for job_index, job in enumerate(
                                watershed_job_info['jobs']):
                            job.wait()
                            # upload file when done
                            if data_manager:
                                upload_single_forecast(
                                    watershed_job_info['jobs_info'][job_index],
                                    data_manager)

                    elif mp_mode == "multiprocess":
                        pool_main = mp_Pool()
                        func = partial(run_ecmwf_rapid_multiprocess_worker,
                                       watershed_job_info['jobs_info'])
                        multiprocess_worker_list = pool_main.imap_unordered(
                            func,
                            watershed_job_info['jobs'],
                            # watershed_job_info['jobs'],
                            chunksize=1)
                        if data_manager:
                            for multi_job_index in multiprocess_worker_list:
                                # upload file when done
                                upload_single_forecast(
                                    watershed_job_info['jobs_info']
                                    [multi_job_index], data_manager)

                        # just in case ...
                        pool_main.close()
                        pool_main.join()

                    # when all jobs in watershed are done, generate warning points
                    if create_warning_points:
                        watershed, subbasin = get_watershed_subbasin_from_folder(
                            rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)

                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            print(
                                "Generating warning points for {0}-{1} from {2}"
                                .format(watershed, subbasin,
                                        forecast_date_timestep))
                            era_interim_files = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "return_period*.nc"))
                            if era_interim_files:
                                try:
                                    generate_ecmwf_warning_points(
                                        forecast_directory,
                                        era_interim_files[0],
                                        forecast_directory,
                                        threshold=warning_flow_threshold)
                                    if upload_output_to_ckan and data_store_url and data_store_api_key:
                                        data_manager.initialize_run_ecmwf(
                                            watershed, subbasin,
                                            forecast_date_timestep)
                                        data_manager.zip_upload_warning_points_in_directory(
                                            forecast_directory)
                                except Exception as ex:
                                    print(ex)
                                    pass
                            else:
                                print(
                                    "No ERA Interim file found. Skipping ...")
                        else:
                            print(
                                "No ERA Interim directory found for {0}. "
                                "Skipping warning point generation...".format(
                                    rapid_input_directory))

                # initialize flows for next run
                if initialize_flows:
                    # create new init flow files/generate warning point files
                    for rapid_input_directory in rapid_input_directories:
                        input_directory = os.path.join(rapid_io_files_location,
                                                       'input',
                                                       rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)
                        if os.path.exists(forecast_directory):
                            # loop through all the rapid_namelist files in directory
                            watershed, subbasin = get_watershed_subbasin_from_folder(
                                rapid_input_directory)
                            if initialize_flows:
                                print(
                                    "Initializing flows for {0}-{1} from {2}".
                                    format(watershed, subbasin,
                                           forecast_date_timestep))
                                basin_files = find_current_rapid_output(
                                    forecast_directory, watershed, subbasin)
                                try:
                                    compute_initial_rapid_flows(
                                        basin_files, input_directory,
                                        forecast_date_timestep,
                                        initialization_time_step)
                                except Exception as ex:
                                    print(ex)
                                    pass

                # run autoroute process if added
                if autoroute_executable_location and autoroute_io_files_location:
                    # run autoroute on all of the watersheds
                    run_autorapid_process(autoroute_executable_location,
                                          autoroute_io_files_location,
                                          rapid_io_files_location,
                                          forecast_date_timestep,
                                          subprocess_forecast_log_dir,
                                          geoserver_url, geoserver_username,
                                          geoserver_password, app_instance_id)

                last_forecast_date = get_datetime_from_date_timestep(
                    forecast_date_timestep)

                # update lock info file with next forecast
                update_lock_info_file(LOCK_INFO_FILE, True,
                                      last_forecast_date.strftime('%Y%m%d%H'))

                # ----------------------------------------------------------------------
                # END FORECAST LOOP
                # ----------------------------------------------------------------------
        except Exception as ex:
            print_exc()
            print(ex)
            pass

        # Release & update lock info file with all completed forecasts
        update_lock_info_file(LOCK_INFO_FILE, False,
                              last_forecast_date.strftime('%Y%m%d%H'))

        if delete_output_when_done:
            # delete local datasets
            for job_info in master_job_info_list:
                try:
                    rmtree(job_info['master_watershed_outflow_directory'])
                except OSError:
                    pass
            # delete watershed folder if empty
            for item in os.listdir(
                    os.path.join(rapid_io_files_location, 'output')):
                try:
                    os.rmdir(
                        os.path.join(rapid_io_files_location, 'output', item))
                except OSError:
                    pass

        # print info to user
        time_end = datetime.datetime.utcnow()
        print("Time Begin: {0}".format(time_begin_all))
        print("Time Finish: {0}".format(time_end))
        print("TOTAL TIME: {0}".format(time_end - time_begin_all))

Пример #4

Показать файл

Файл: era_interim_rapid_process.py Проект: CI-WATER/erfp_era_interim_process

def run_era_interim_rapid_process(rapid_executable_location, rapid_io_files_location, ecmwf_forecast_location,
                                  era_interim_data_location, condor_log_directory, main_log_directory, data_store_url,
                                  data_store_api_key, app_instance_id, sync_rapid_input_with_ckan, download_ecmwf,
                                  download_era_interim, upload_output_to_ckan, generate_return_periods_file):
    """
    This it the main process
    """
    time_begin_all = datetime.datetime.utcnow()
    date_string = time_begin_all.strftime('%Y%m%d')
    #date_string = datetime.datetime(2015,2,3).strftime('%Y%m%d')
    rapid_scripts_location = os.path.dirname(os.path.realpath(__file__))

    if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
        #sync with data store
        ri_manager = RAPIDInputDatasetManager(data_store_url,
                                              data_store_api_key,
                                              'ecmwf',
                                              app_instance_id)
        ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input'))

    #clean up old log files
    clean_logs(condor_log_directory, main_log_directory)

    #initialize HTCondor Directory
    condor_init_dir = os.path.join(condor_log_directory, date_string)
    try:
        os.makedirs(condor_init_dir)
    except OSError:
        pass

    #get list of correclty formatted rapid input directories in rapid directory
    rapid_input_directories = []
    for directory in os.listdir(os.path.join(rapid_io_files_location,'input')):
        if os.path.isdir(os.path.join(rapid_io_files_location,'input', directory)) \
            and len(directory.split("-")) == 2:
            rapid_input_directories.append(directory)
        else:
            print directory, "incorrectly formatted. Skipping ..."

    era_interim_folder = era_interim_data_location
    if download_era_interim:
        #download historical ERA data
        era_interim_folders = download_all_ftp(era_interim_data_location,
           'erai_runoff_1980to20*.tar.gz.tar')
        era_interim_folder = era_interim_folders[0]

    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #init data manager for CKAN
        data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                data_store_api_key)

    #run ERA Interim processes
    iteration = 0
    job_list = []
    job_info_list = []
    for rapid_input_directory in rapid_input_directories:
        input_folder_split = rapid_input_directory.split("-")
        watershed = input_folder_split[0]
        subbasin = input_folder_split[1]
        master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
        master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output',
                                                          rapid_input_directory)
        try:
            os.makedirs(master_watershed_outflow_directory)
        except OSError:
            pass
        #get basin names
        interim_folder_basename = os.path.basename(era_interim_folder)
	print era_interim_folder, interim_folder_basename
        outflow_file_name = 'Qout_%s.nc' % interim_folder_basename
        node_rapid_outflow_file = outflow_file_name
        master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name)

        #create job to downscale forecasts for watershed
        job = CJob('job_%s_%s_%s' % (interim_folder_basename, watershed, iteration), tmplt.vanilla_transfer_files)
        job.set('executable',os.path.join(rapid_scripts_location,'compute_ecmwf_rapid.py'))
        job.set('transfer_input_files', "%s, %s" % (master_watershed_input_directory, rapid_scripts_location))
        job.set('initialdir', condor_init_dir)
        job.set('arguments', '%s %s %s %s %s' % (watershed.lower(), subbasin.lower(), rapid_executable_location,
                                                    era_interim_folder, ecmwf_forecast_location))
        job.set('transfer_output_remaps', "\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file))
        job.submit()
        job_list.append(job)
        job_info_list.append({'watershed' : watershed,
                              'subbasin' : subbasin,
                              'outflow_file_name' : master_rapid_outflow_file,
                              'master_watershed_outflow_directory': master_watershed_outflow_directory,
                              })
        iteration += 1

    #wait for jobs to finish then upload files
    for index, job in enumerate(job_list):
        job.wait()

	#generate return periods
	if generate_return_periods_file:
	    job_info = job_info_list[index]
	    watershed_output_dir = job_info['master_watershed_outflow_directory']
	    erai_output_file = job_info['outflow_file_name']
	    return_periods_file = os.path.join(watershed_output_dir, 'return_periods.nc')
	    generate_return_periods(erai_output_file, return_periods_file)

        """
        #upload file when done
        if upload_output_to_ckan and data_store_url and data_store_api_key:
            job_info = job_info_list[index]
            print "Uploading", job_info['watershed'], job_info['subbasin'], \
                job_info['forecast_date_timestep'], job_info['ensemble_number']
            #Upload to CKAN
            data_manager.initialize_run_ecmwf(job_info['watershed'], job_info['subbasin'], job_info['forecast_date_timestep'])
            data_manager.update_resource_ensemble_number(job_info['ensemble_number'])
            #upload file
            try:
                #tar.gz file
                output_tar_file =  os.path.join(job_info['master_watershed_outflow_directory'], "%s.tar.gz" % data_manager.resource_name)
                if not os.path.exists(output_tar_file):
                    with tarfile.open(output_tar_file, "w:gz") as tar:
                        tar.add(job_info['outflow_file_name'], arcname=os.path.basename(job_info['outflow_file_name']))
                return_data = data_manager.upload_resource(output_tar_file)
                if not return_data['success']:
                    print return_data
                    print "Attempting to upload again"
                    return_data = data_manager.upload_resource(output_tar_file)
                    if not return_data['success']:
                        print return_data
                    else:
                        print "Upload success"
                else:
                    print "Upload success"
            except Exception, e:
                print e
                pass
            #remove tar.gz file
            os.remove(output_tar_file)

    #initialize flows for next run
    if initialize_flows:
        #create new init flow files
        for rapid_input_directory in rapid_input_directories:
            input_directory = os.path.join(rapid_io_files_location, 'input', rapid_input_directory)
            path_to_watershed_files = os.path.join(rapid_io_files_location, 'output', rapid_input_directory)
            forecast_date_timestep = None
            #finds the current output from downscaled ECMWF forecasts
            if os.path.exists(path_to_watershed_files):
                forecast_date_timestep = sorted([d for d in os.listdir(path_to_watershed_files) \
                                        if os.path.isdir(os.path.join(path_to_watershed_files, d))],
                                        reverse=True)[0]

            if forecast_date_timestep:
                #loop through all the rapid_namelist files in directory
                forecast_directory = os.path.join(path_to_watershed_files, forecast_date_timestep)
                input_folder_split = rapid_input_directory.split("-")
                watershed = input_folder_split[0]
                subbasin = input_folder_split[1]
                if initialize_flows:
                    print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep
                    basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin)
                    try:
                        compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep)
                    except Exception, ex:
                        print ex
                        pass
    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #delete local datasets
        for job_info in job_info_list:
            try:
                rmtree(job_info['master_watershed_outflow_directory'])
            except OSError:
                pass
        #delete watershed folder if empty
        for item in os.listdir(os.path.join(rapid_io_files_location, 'output')):
            try:
                os.rmdir(os.path.join(rapid_io_files_location, 'output', item))
            except OSError:
                pass
    """

    #print info to user
    time_end = datetime.datetime.utcnow()
    print "Time Begin All: " + str(time_begin_all)
    print "Time Finish All: " + str(time_end)
    print "TOTAL TIME: "  + str(time_end-time_begin_all)