Пример #1
0
    def test__init__(self):
        attributes = dict()
        attributes['job_name'] = self.job_name

        self.expected = {'_name': self.job_name,
                    '_attributes': attributes,
                    '_num_jobs': 1,
                    '_cluster_id': 0,
                    '_job_file': '',
                    '_remote': None,
                    '_remote_id': None,
                    '_remote_input_files': None,
                    '_cwd': '.'}
        self.actual = self.job.__dict__
        self.msg = 'testing initialization with default values'
        self.assertDictEqual(*self.assert_args)

        exe = 'exe'
        args = 'args'
        num_jobs = '5'
        self.job = Job(self.job_name, OrderedDict(), num_jobs, executable=exe, arguments=args)
        attributes.update({'executable': exe, 'arguments': args})

        self.expected.update({'_name': self.job_name,
                    '_attributes': attributes,
                    '_num_jobs': int(num_jobs)})
        self.actual = self.job.__dict__
        self.actual['_attributes'] = dict(self.actual['_attributes'])
        self.msg = 'testing initialization with all values supplied'
        self.assertDictEqual(*self.assert_args)

        num_jobs = 'five'
        self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)
Пример #2
0
 def test_log_file(self):
     self.job = Job(self.job_name, Templates.base)
     log_file = '%s/%s/%s.%s.log' % (self.job.initial_dir, self.job.logdir, self.job_name, self.job.cluster_id)
     expected = log_file
     actual = self.job.log_file
     msg = 'checking resolving attribute function for log file'
     self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
Пример #3
0
    def test_submit(self):
        working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir')

        self.job = Job('remote_test',
                       Templates.vanilla_transfer_files,
                       host='localhost',
                       username=os.environ['USER'],
                       private_key='~/.ssh/id_rsa',
                       remote_input_files=['../copy_test.py', 'input.txt'],
                       transfer_input_files='../input.txt',
                       executable=os.path.join(self.base_dir, 'test_files',
                                               'copy_test.py'),
                       working_directory=working_dir)

        remote_base_path = os.path.expanduser('~/' + self.job._remote_id)
        if os.path.exists(remote_base_path):
            raise
        self.job.submit()
        self.assertTrue(os.path.exists(remote_base_path))
        self.job.wait()
        self.job.sync_remote_output()
        local_output = os.path.join(working_dir, self.job.name)
        self.assertTrue(os.path.exists(local_output))
        output = os.path.join(local_output, 'output.txt')

        self.assertTrue(os.path.exists(output))
        shutil.rmtree(remote_base_path)
        shutil.rmtree(local_output)
Пример #4
0
 def test_resolve_attribute(self):
     job = Job(self.job_name, Templates.vanilla_base)
     expected = self.job_name
     actual = job._resolve_attribute('initialdir')
     msg = 'checking resolving attribute function'
     self.assertEqual(
         expected, actual,
         '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
Пример #5
0
    def test__getattr__(self):
        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.executable
        msg = 'testing that existing value is returned'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        pass
Пример #6
0
    def test_get(self):
        non_existent_attr = 'not-there'
        expected = None
        actual = self.job.get(non_existent_attr)
        msg = 'testing that None is returned when attribute does not exist'
        self.assertIsNone(actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = 'expected'
        actual = self.job.get(non_existent_attr, expected)
        msg = 'testing that supplied value is returned when attribute does not exist'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.get('executable')
        msg = 'testing that existing value is returned'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
Пример #7
0
    def setUp(self):
        """

        :return:
        """
        self.job_a = Job('a', Templates.base)
        self.job_b = Job('b', Templates.base)
        self.job_c = Job('c', Templates.base)
        self.job_d = Job('d', Templates.base)

        self.node_a = Node(self.job_a)
        self.node_b = Node(self.job_b)
        self.node_c = Node(self.job_c)
        self.node_d = Node(self.job_d)

        self.node_a.add_child(self.node_b)
        self.node_a.add_child(self.node_c)
        self.node_d.add_parent(self.node_b)
        self.node_d.add_parent(self.node_c)
Пример #8
0
    def condorpy_job(self):
        if not hasattr(self, '_condorpy_job'):
            if 'executable' in self.attributes.keys():
                del self.attributes['executable']

            if self.scheduler:
                host = self.scheduler.host
                username = self.scheduler.username
                password = self.scheduler.password
                private_key = self.scheduler.private_key_path
                private_key_pass = self.scheduler.private_key_pass
            else:
                host = None
                username = None
                password = None
                private_key = None
                private_key_pass = None

            attributes = dict()
            attributes.update(self.attributes)
            attributes.pop('remote_input_files', None)

            job = Job(name=self.name.replace(' ', '_'),
                      attributes=self.condorpy_template,
                      executable=self.executable,
                      host=host,
                      username=username,
                      password=password,
                      private_key=private_key,
                      private_key_pass=private_key_pass,
                      remote_input_files=self.remote_input_files,
                      working_directory=self.workspace,
                      **attributes)

            job._cluster_id = self.cluster_id
            job._num_jobs = self.num_jobs
            if self.remote_id:
                job._remote_id = self.remote_id
            else:
                self.remote_id = job._remote_id
            self._condorpy_job = job
        return self._condorpy_job
Пример #9
0
    def condorpy_job(self):
        if not hasattr(self, '_condorpy_job'):
            if 'executable' in self.attributes.keys():
                del self.attributes['executable']

            if self.scheduler:
                host=self.scheduler.host
                username=self.scheduler.username
                password=self.scheduler.password
                private_key=self.scheduler.private_key_path
                private_key_pass=self.scheduler.private_key_pass
            else:
                host=None
                username=None
                password=None
                private_key=None
                private_key_pass=None

            attributes = dict()
            attributes.update(self.attributes)
            attributes.pop('remote_input_files', None)

            job = Job(name=self.name.replace(' ', '_'),
                      attributes=self.condorpy_template,
                      executable=self.executable,
                      host=host,
                      username=username,
                      password=password,
                      private_key=private_key,
                      private_key_pass=private_key_pass,
                      remote_input_files=self.remote_input_files,
                      working_directory=self.workspace,
                      **attributes)

            job._cluster_id = self.cluster_id
            job._num_jobs = self.num_jobs
            if self.remote_id:
                job._remote_id = self.remote_id
            else:
                self.remote_id = job._remote_id
            self._condorpy_job = job
        return self._condorpy_job
Пример #10
0
    def condorpy_job(self):

        if not hasattr(self, '_condorpy_job'):
            job = Job(name=self.name.replace(' ', '_'),
                      attributes=self.attributes,
                      num_jobs=self.num_jobs,
                      remote_input_files=self.remote_input_files,
                      working_directory=self.workspace)

            self._condorpy_job = job
        return self._condorpy_job
Пример #11
0
    def test__init__(self):
        attributes = OrderedDict()
        attributes['job_name'] = self.job_name
        attributes['executable'] = None
        attributes['arguments'] = None

        expected = {
            '_name': self.job_name,
            '_attributes': attributes,
            '_num_jobs': 1,
            '_cluster_id': 0,
            '_job_file': ''
        }
        actual = self.job.__dict__
        msg = 'testing initialization with default values'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual))

        exe = 'exe'
        args = '-args'
        num_jobs = '5'
        self.job = Job(self.job_name, OrderedDict(), exe, args, num_jobs)
        attributes['executable'] = exe
        attributes['arguments'] = args

        expected = {
            '_name': self.job_name,
            '_attributes': attributes,
            '_num_jobs': int(num_jobs),
            '_cluster_id': 0,
            '_job_file': ''
        }
        actual = self.job.__dict__
        msg = 'testing initialization with all values supplied'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        num_jobs = 'five'
        self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)
Пример #12
0
    def test_condorpy_node(self, mock_job):
        mock_job_return = Job(name='test_job',
                              attributes={'foo': 'bar'},
                              num_jobs=1,
                              remote_input_files=['test_file.txt'],
                              working_directory=self.workspace_dir)
        mock_job.return_value = mock_job_return

        self.condorworkflownode.job = mock_job_return
        ret = self.condorworkflownode.condorpy_node

        # Check result
        self.assertEqual('<Node: test_job parents() children()>', repr(ret))
Пример #13
0
class TestIntegration(unittest.TestCase):

    expected = None
    actual = None
    msg = None
    base_dir = os.path.join(os.path.dirname(__file__))

    @property
    def output(self):
        return '%s\nExpected: %s\nActual:   %s\n' % (self.msg, self.expected,
                                                     self.actual)

    @property
    def assert_args(self):
        return (self.expected, self.actual, self.output)

    def setUp(self):
        self.job_name = 'job_name'
        self.job = Job(self.job_name)

    def tearDown(self):
        pass

    def test_submit(self):
        working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir')

        self.job = Job('remote_test',
                       Templates.vanilla_transfer_files,
                       host='localhost',
                       username=os.environ['USER'],
                       private_key='~/.ssh/id_rsa',
                       remote_input_files=['../copy_test.py', 'input.txt'],
                       transfer_input_files='../input.txt',
                       executable=os.path.join(self.base_dir, 'test_files',
                                               'copy_test.py'),
                       working_directory=working_dir)

        remote_base_path = os.path.expanduser('~/' + self.job._remote_id)
        if os.path.exists(remote_base_path):
            raise
        self.job.submit()
        self.assertTrue(os.path.exists(remote_base_path))
        self.job.wait()
        self.job.sync_remote_output()
        local_output = os.path.join(working_dir, self.job.name)
        self.assertTrue(os.path.exists(local_output))
        output = os.path.join(local_output, 'output.txt')

        self.assertTrue(os.path.exists(output))
        shutil.rmtree(remote_base_path)
        shutil.rmtree(local_output)
Пример #14
0
def run_autoroute_multiprocess(autoroute_input_directory, #path to AutoRoute input directory
                               autoroute_output_directory, #path to AutoRoute output directory
                               log_directory, #path to HTCondor/multiprocessing logs
                               autoroute_executable_location="", #location of AutoRoute executable
                               autoroute_manager=None, #AutoRoute manager with default parameters
                               rapid_output_directory="", #path to ECMWF RAPID input/output directory
                               return_period="", # return period name in return period file
                               return_period_file="", # return period file generated from RAPID historical run
                               rapid_output_file="", #path to RAPID output file to be used
                               date_peak_search_start=None, #datetime of start of search for peakflow
                               date_peak_search_end=None, #datetime of end of search for peakflow
                               river_id="", #field with unique identifier of river
                               streamflow_id="", #field with streamflow
                               stream_network_shapefile="", #stream network shapefile
                               mode="multiprocess", #multiprocess or htcondor 
                               generate_flood_map_raster=True, #generate flood raster
                               generate_flood_depth_raster=False, #generate flood raster
                               generate_flood_map_shapefile=False, #generate a flood map shapefile
                               wait_for_all_processes_to_finish=True, #waits for all processes to finish before ending script
                               num_cpus=-17 #number of processes to use on computer
                               ):
    """
    This it the main AutoRoute-RAPID process
    """
    time_start_all = datetime.utcnow()
    if not generate_flood_depth_raster and not generate_flood_map_raster and not generate_flood_map_shapefile:
        raise Exception("ERROR: Must set generate_flood_depth_raster, generate_flood_map_raster, or generate_flood_map_shapefile to True to proceed ...")
        
    #--------------------------------------------------------------------------
    #Validate Inputs
    #--------------------------------------------------------------------------
    valid_mode_list = ['multiprocess','htcondor']
    if mode not in valid_mode_list:
        raise Exception("ERROR: Invalid multiprocess mode {}. Only multiprocess or htcondor allowed ...".format(mode))
        
    if mode == "htcondor" and not HTCONDOR_ENABLED:
        raise Exception("ERROR: HTCondor mode not allowed. Must have condorpy and HTCondor installed to work ...".format(mode))
        
    #DETERMINE MODE TO PREPARE STREAMFLOW
    PREPARE_MODE = get_valid_streamflow_prepare_mode(autoroute_input_directory,
                                                     rapid_output_directory,
                                                     return_period,
                                                     return_period_file,
                                                     rapid_output_file,
                                                     river_id,
                                                     streamflow_id,
                                                     stream_network_shapefile,
                                                     )    
    #--------------------------------------------------------------------------
    #Initialize Run
    #--------------------------------------------------------------------------
    try:
        os.makedirs(autoroute_output_directory)
    except OSError:
        pass
    
    local_scripts_location = os.path.dirname(os.path.realpath(__file__))

    #initialize HTCondor/multiprocess log directories
    prepare_log_directory = os.path.join(log_directory, "prepare")
    try:
        os.makedirs(prepare_log_directory)
    except OSError:
        pass
    if PREPARE_MODE > 0:
        print("Streamflow preparation logs can be found here: {0}".format(prepare_log_directory))
        
    run_log_directory = os.path.join(log_directory, "run")
    try:
        os.makedirs(run_log_directory)
    except OSError:
        pass
    print("AutoRoute simulation logs can be found here: {0}".format(run_log_directory))

    #keep list of jobs
    autoroute_job_info = {
                            'multiprocess_job_list': [],
                            'htcondor_job_list': [],
                            'htcondor_job_info': [],
                            'output_folder': autoroute_output_directory,
                           }
                           
    if mode == "multiprocess":
        num_cpus = get_valid_num_cpus(num_cpus)                    
        #start pool
        pool_streamflow = multiprocessing.Pool(num_cpus)
        pool_main = multiprocessing.Pool(num_cpus)

    #--------------------------------------------------------------------------
    #Run the model
    #--------------------------------------------------------------------------
    #loop through sub-directories
    streamflow_job_list = []
    for directory in os.listdir(autoroute_input_directory):
        master_watershed_autoroute_input_directory = os.path.join(autoroute_input_directory, directory)
        if os.path.isdir(master_watershed_autoroute_input_directory):
            autoroute_watershed_name = os.path.basename(autoroute_input_directory)
            autoroute_job_name = "{0}-{1}".format(autoroute_watershed_name, directory)
            
            try:
                case_insensitive_file_search(master_watershed_autoroute_input_directory, r'elevation\.(?!prj)')
            except Exception:
                try:
                    case_insensitive_file_search(os.path.join(master_watershed_autoroute_input_directory, 'elevation'), r'hdr\.adf')
                except Exception:
                    print("ERROR: Elevation raster not found. Skipping run ...")
                    continue
                    pass
                pass
            
            try:
                stream_info_file = case_insensitive_file_search(master_watershed_autoroute_input_directory,
                                                                r'stream_info\.txt')
            except Exception:
                print("Stream info file not found. Skipping run ...")
                continue
                pass

            if PREPARE_MODE > 0:
                streamflow_job_list.append((PREPARE_MODE,
                                            master_watershed_autoroute_input_directory,
                                            stream_info_file,
                                            rapid_output_directory,
                                            return_period_file,
                                            return_period,
                                            rapid_output_file,
                                            date_peak_search_start,
                                            date_peak_search_end,
                                            river_id,
                                            streamflow_id,
                                            stream_network_shapefile,
                                            autoroute_job_name,
                                            prepare_log_directory,
                                            ))
            
            output_shapefile_base_name = '{0}_{1}'.format(autoroute_watershed_name, directory)
            #set up flood raster name
            output_flood_map_raster_name = 'flood_map_raster_{0}.tif'.format(output_shapefile_base_name)
            master_output_flood_map_raster_name = os.path.join(autoroute_output_directory, output_flood_map_raster_name)
            #set up flood raster name
            output_flood_depth_raster_name = 'flood_depth_raster_{0}.tif'.format(output_shapefile_base_name)
            master_output_flood_depth_raster_name = os.path.join(autoroute_output_directory, output_flood_depth_raster_name)
            #set up flood shapefile name
            output_shapefile_shp_name = '{0}.shp'.format(output_shapefile_base_name)
            master_output_shapefile_shp_name = os.path.join(autoroute_output_directory, output_shapefile_shp_name)

            delete_flood_map_raster = False
            if not generate_flood_map_shapefile:
                master_output_shapefile_shp_name = ""
            else:
                if not generate_flood_map_raster:
                    generate_flood_map_raster = True
                    delete_flood_map_raster = True
                
            if not generate_flood_map_raster:
                master_output_flood_map_raster_name = ""

            if not generate_flood_depth_raster:
                master_output_flood_depth_raster_name = ""

            if mode == "htcondor":
                #create job to run autoroute for each raster in watershed
                job = CJob('job_autoroute_{0}_{1}'.format(os.path.basename(autoroute_input_directory), directory), tmplt.vanilla_transfer_files)
                

                if generate_flood_map_shapefile:
                    #setup additional floodmap shapfile names
                    output_shapefile_shx_name = '{0}.shx'.format(output_shapefile_base_name)
                    master_output_shapefile_shx_name = os.path.join(autoroute_output_directory, output_shapefile_shx_name)
                    output_shapefile_prj_name = '{0}.prj'.format(output_shapefile_base_name)
                    master_output_shapefile_prj_name = os.path.join(autoroute_output_directory, output_shapefile_prj_name)
                    output_shapefile_dbf_name = '{0}.dbf'.format(output_shapefile_base_name)
                    master_output_shapefile_dbf_name = os.path.join(autoroute_output_directory, output_shapefile_dbf_name)
                
                    transfer_output_remaps = "{0} = {1}; {2} = {3}; {4} = {5};" \
                                             " {6} = {7}; {8} = {9}".format(output_shapefile_shp_name, 
                                                                            master_output_shapefile_shp_name,
                                                                            output_shapefile_shx_name,
                                                                            master_output_shapefile_shx_name,
                                                                            output_shapefile_prj_name,
                                                                            master_output_shapefile_prj_name,
                                                                            output_shapefile_dbf_name,
                                                                            master_output_shapefile_dbf_name,
                                                                            output_flood_map_raster_name,
                                                                            master_output_flood_map_raster_name)
                    
                    if generate_flood_depth_raster:
                        transfer_output_remaps += "; {0} = {1}".format(output_flood_depth_raster_name, 
                                                                       master_output_flood_depth_raster_name)
                else:
                    output_shapefile_shp_name = ""
                    transfer_output_remaps = ""
                    if generate_flood_map_raster:
                        transfer_output_remaps = "{0} = {1}".format(output_flood_map_raster_name, 
                                                                    master_output_flood_map_raster_name)
                    if generate_flood_depth_raster:
                        if transfer_output_remaps:
                            transfer_output_remaps += "; "
                            
                        transfer_output_remaps += "{0} = {1}".format(output_flood_depth_raster_name, 
                                                                     master_output_flood_depth_raster_name)
                                                                     
                job.set('transfer_output_remaps',"\"{0}\"" .format(transfer_output_remaps))
                                                                      
                job.set('executable', os.path.join(local_scripts_location,'multicore_worker_process.py'))
                job.set('transfer_input_files', "{0}".format(master_watershed_autoroute_input_directory))
                job.set('initialdir', run_log_directory)
                    
                job.set('arguments', '{0} {1} {2} {3} {4} {5} {6}' % (autoroute_executable_location,
                                                                      autoroute_manager,
                                                                      directory,
                                                                      output_flood_map_raster_name,
                                                                      output_flood_depth_raster_name,
                                                                      output_shapefile_shp_name,
                                                                      delete_flood_map_raster))
                                                              
                autoroute_job_info['htcondor_job_list'].append(job)
                autoroute_job_info['htcondor_job_info'].append({ 'output_shapefile_base_name': output_shapefile_base_name,
                                                                 'autoroute_job_name': autoroute_job_name})

            else: #mode == "multiprocess":
                autoroute_job_info['multiprocess_job_list'].append((autoroute_executable_location,
                                                                    autoroute_manager,
                                                                    master_watershed_autoroute_input_directory,
                                                                    master_output_flood_map_raster_name,
                                                                    master_output_flood_depth_raster_name,
                                                                    master_output_shapefile_shp_name,
                                                                    delete_flood_map_raster,
                                                                    autoroute_job_name,
                                                                    run_log_directory
                                                                    ))
                #For testing function serially
                """
                run_autoroute_multiprocess_worker((autoroute_executable_location,
                                                   autoroute_manager,
                                                   master_watershed_autoroute_input_directory,
                                                   master_output_flood_map_raster_name,
                                                   master_output_flood_depth_raster_name,
                                                   master_output_shapefile_shp_name,
                                                   delete_flood_map_raster,
                                                   autoroute_job_name,
                                                   run_log_directory))
                """
    if PREPARE_MODE > 0:
        #generate streamflow
        streamflow_job_list = pool_streamflow.imap_unordered(prepare_autoroute_streamflow_multiprocess_worker,
                                                             streamflow_job_list,
                                                             chunksize=1)
        for streamflow_job_output in streamflow_job_list:
            print("STREAMFLOW READY: {0}".format(streamflow_job_output))
        pool_streamflow.close()
        pool_streamflow.join()
        
    print("Running AutoRoute simulations ...")
    #submit jobs to run
    if mode == "multiprocess":
        autoroute_job_info['multiprocess_worker_list'] = pool_main.imap_unordered(run_autoroute_multiprocess_worker, 
                                                                                 autoroute_job_info['multiprocess_job_list'], 
                                                                                 chunksize=1)
    else:
        for htcondor_job in autoroute_job_info['htcondor_job_list']:
            htcondor_job.submit()

    if wait_for_all_processes_to_finish:
        #wait for all of the jobs to complete
        if mode == "multiprocess":
            for multi_job_output in autoroute_job_info['multiprocess_worker_list']:
                print("JOB FINISHED: {0}".format(multi_job_output[3]))
            #just in case ...
            pool_main.close()
            pool_main.join()
        else:
            for htcondor_job_index, htcondor_job in enumerate(autoroute_job_info['htcondor_job_list']):
                htcondor_job.wait()
                print("JOB FINISHED: {0}".format(autoroute_job_info['htcondor_job_info'][htcondor_job_index]['autoroute_job_name']))
    
        print("Time to complete entire AutoRoute process: {0}".format(datetime.utcnow()-time_start_all))
    else:       
        return autoroute_job_info
Пример #15
0
def run_autoroute_multiprocess(
    autoroute_input_directory,  #path to AutoRoute input directory
    autoroute_output_directory,  #path to AutoRoute output directory
    log_directory,  #path to HTCondor/multiprocessing logs
    autoroute_executable_location="",  #location of AutoRoute executable
    autoroute_manager=None,  #AutoRoute manager with default parameters
    rapid_output_directory="",  #path to ECMWF RAPID input/output directory
    return_period="",  # return period name in return period file
    return_period_file="",  # return period file generated from RAPID historical run
    rapid_output_file="",  #path to RAPID output file to be used
    date_peak_search_start=None,  #datetime of start of search for peakflow
    date_peak_search_end=None,  #datetime of end of search for peakflow
    river_id="",  #field with unique identifier of river
    streamflow_id="",  #field with streamflow
    stream_network_shapefile="",  #stream network shapefile
    mode="multiprocess",  #multiprocess or htcondor 
    generate_flood_map_raster=True,  #generate flood raster
    generate_flood_depth_raster=False,  #generate flood raster
    generate_flood_map_shapefile=False,  #generate a flood map shapefile
    wait_for_all_processes_to_finish=True,  #waits for all processes to finish before ending script
    num_cpus=-17  #number of processes to use on computer
):
    """
    This it the main AutoRoute-RAPID process
    """
    time_start_all = datetime.utcnow()
    if not generate_flood_depth_raster and not generate_flood_map_raster and not generate_flood_map_shapefile:
        raise Exception(
            "ERROR: Must set generate_flood_depth_raster, generate_flood_map_raster, or generate_flood_map_shapefile to True to proceed ..."
        )

    #--------------------------------------------------------------------------
    #Validate Inputs
    #--------------------------------------------------------------------------
    valid_mode_list = ['multiprocess', 'htcondor']
    if mode not in valid_mode_list:
        raise Exception(
            "ERROR: Invalid multiprocess mode {}. Only multiprocess or htcondor allowed ..."
            .format(mode))

    if mode == "htcondor" and not HTCONDOR_ENABLED:
        raise Exception(
            "ERROR: HTCondor mode not allowed. Must have condorpy and HTCondor installed to work ..."
            .format(mode))

    #DETERMINE MODE TO PREPARE STREAMFLOW
    PREPARE_MODE = get_valid_streamflow_prepare_mode(
        autoroute_input_directory,
        rapid_output_directory,
        return_period,
        return_period_file,
        rapid_output_file,
        river_id,
        streamflow_id,
        stream_network_shapefile,
    )
    #--------------------------------------------------------------------------
    #Initialize Run
    #--------------------------------------------------------------------------
    try:
        os.makedirs(autoroute_output_directory)
    except OSError:
        pass

    local_scripts_location = os.path.dirname(os.path.realpath(__file__))

    #initialize HTCondor/multiprocess log directories
    prepare_log_directory = os.path.join(log_directory, "prepare")
    try:
        os.makedirs(prepare_log_directory)
    except OSError:
        pass
    if PREPARE_MODE > 0:
        print("Streamflow preparation logs can be found here: {0}".format(
            prepare_log_directory))

    run_log_directory = os.path.join(log_directory, "run")
    try:
        os.makedirs(run_log_directory)
    except OSError:
        pass
    print("AutoRoute simulation logs can be found here: {0}".format(
        run_log_directory))

    #keep list of jobs
    autoroute_job_info = {
        'multiprocess_job_list': [],
        'htcondor_job_list': [],
        'htcondor_job_info': [],
        'output_folder': autoroute_output_directory,
    }

    if mode == "multiprocess":
        num_cpus = get_valid_num_cpus(num_cpus)
        #start pool
        pool_streamflow = multiprocessing.Pool(num_cpus)
        pool_main = multiprocessing.Pool(num_cpus)

    #--------------------------------------------------------------------------
    #Run the model
    #--------------------------------------------------------------------------
    #loop through sub-directories
    streamflow_job_list = []
    for directory in os.listdir(autoroute_input_directory):
        master_watershed_autoroute_input_directory = os.path.join(
            autoroute_input_directory, directory)
        if os.path.isdir(master_watershed_autoroute_input_directory):
            autoroute_watershed_name = os.path.basename(
                autoroute_input_directory)
            autoroute_job_name = "{0}-{1}".format(autoroute_watershed_name,
                                                  directory)

            try:
                case_insensitive_file_search(
                    master_watershed_autoroute_input_directory,
                    r'elevation\.(?!prj)')
            except Exception:
                try:
                    case_insensitive_file_search(
                        os.path.join(
                            master_watershed_autoroute_input_directory,
                            'elevation'), r'hdr\.adf')
                except Exception:
                    print(
                        "ERROR: Elevation raster not found. Skipping run ...")
                    continue
                    pass
                pass

            try:
                stream_info_file = case_insensitive_file_search(
                    master_watershed_autoroute_input_directory,
                    r'stream_info\.txt')
            except Exception:
                print("Stream info file not found. Skipping run ...")
                continue
                pass

            if PREPARE_MODE > 0:
                streamflow_job_list.append((
                    PREPARE_MODE,
                    master_watershed_autoroute_input_directory,
                    stream_info_file,
                    rapid_output_directory,
                    return_period_file,
                    return_period,
                    rapid_output_file,
                    date_peak_search_start,
                    date_peak_search_end,
                    river_id,
                    streamflow_id,
                    stream_network_shapefile,
                    autoroute_job_name,
                    prepare_log_directory,
                ))

            output_shapefile_base_name = '{0}_{1}'.format(
                autoroute_watershed_name, directory)
            #set up flood raster name
            output_flood_map_raster_name = 'flood_map_raster_{0}.tif'.format(
                output_shapefile_base_name)
            master_output_flood_map_raster_name = os.path.join(
                autoroute_output_directory, output_flood_map_raster_name)
            #set up flood raster name
            output_flood_depth_raster_name = 'flood_depth_raster_{0}.tif'.format(
                output_shapefile_base_name)
            master_output_flood_depth_raster_name = os.path.join(
                autoroute_output_directory, output_flood_depth_raster_name)
            #set up flood shapefile name
            output_shapefile_shp_name = '{0}.shp'.format(
                output_shapefile_base_name)
            master_output_shapefile_shp_name = os.path.join(
                autoroute_output_directory, output_shapefile_shp_name)

            delete_flood_map_raster = False
            if not generate_flood_map_shapefile:
                master_output_shapefile_shp_name = ""
            else:
                if not generate_flood_map_raster:
                    generate_flood_map_raster = True
                    delete_flood_map_raster = True

            if not generate_flood_map_raster:
                master_output_flood_map_raster_name = ""

            if not generate_flood_depth_raster:
                master_output_flood_depth_raster_name = ""

            if mode == "htcondor":
                #create job to run autoroute for each raster in watershed
                job = CJob(
                    'job_autoroute_{0}_{1}'.format(
                        os.path.basename(autoroute_input_directory),
                        directory), tmplt.vanilla_transfer_files)

                if generate_flood_map_shapefile:
                    #setup additional floodmap shapfile names
                    output_shapefile_shx_name = '{0}.shx'.format(
                        output_shapefile_base_name)
                    master_output_shapefile_shx_name = os.path.join(
                        autoroute_output_directory, output_shapefile_shx_name)
                    output_shapefile_prj_name = '{0}.prj'.format(
                        output_shapefile_base_name)
                    master_output_shapefile_prj_name = os.path.join(
                        autoroute_output_directory, output_shapefile_prj_name)
                    output_shapefile_dbf_name = '{0}.dbf'.format(
                        output_shapefile_base_name)
                    master_output_shapefile_dbf_name = os.path.join(
                        autoroute_output_directory, output_shapefile_dbf_name)

                    transfer_output_remaps = "{0} = {1}; {2} = {3}; {4} = {5};" \
                                             " {6} = {7}; {8} = {9}".format(output_shapefile_shp_name,
                                                                            master_output_shapefile_shp_name,
                                                                            output_shapefile_shx_name,
                                                                            master_output_shapefile_shx_name,
                                                                            output_shapefile_prj_name,
                                                                            master_output_shapefile_prj_name,
                                                                            output_shapefile_dbf_name,
                                                                            master_output_shapefile_dbf_name,
                                                                            output_flood_map_raster_name,
                                                                            master_output_flood_map_raster_name)

                    if generate_flood_depth_raster:
                        transfer_output_remaps += "; {0} = {1}".format(
                            output_flood_depth_raster_name,
                            master_output_flood_depth_raster_name)
                else:
                    output_shapefile_shp_name = ""
                    transfer_output_remaps = ""
                    if generate_flood_map_raster:
                        transfer_output_remaps = "{0} = {1}".format(
                            output_flood_map_raster_name,
                            master_output_flood_map_raster_name)
                    if generate_flood_depth_raster:
                        if transfer_output_remaps:
                            transfer_output_remaps += "; "

                        transfer_output_remaps += "{0} = {1}".format(
                            output_flood_depth_raster_name,
                            master_output_flood_depth_raster_name)

                job.set('transfer_output_remaps',
                        "\"{0}\"".format(transfer_output_remaps))

                job.set(
                    'executable',
                    os.path.join(local_scripts_location,
                                 'multicore_worker_process.py'))
                job.set(
                    'transfer_input_files',
                    "{0}".format(master_watershed_autoroute_input_directory))
                job.set('initialdir', run_log_directory)

                job.set(
                    'arguments', '{0} {1} {2} {3} {4} {5} {6}' %
                    (autoroute_executable_location, autoroute_manager,
                     directory, output_flood_map_raster_name,
                     output_flood_depth_raster_name, output_shapefile_shp_name,
                     delete_flood_map_raster))

                autoroute_job_info['htcondor_job_list'].append(job)
                autoroute_job_info['htcondor_job_info'].append({
                    'output_shapefile_base_name':
                    output_shapefile_base_name,
                    'autoroute_job_name':
                    autoroute_job_name
                })

            else:  #mode == "multiprocess":
                autoroute_job_info['multiprocess_job_list'].append(
                    (autoroute_executable_location, autoroute_manager,
                     master_watershed_autoroute_input_directory,
                     master_output_flood_map_raster_name,
                     master_output_flood_depth_raster_name,
                     master_output_shapefile_shp_name, delete_flood_map_raster,
                     autoroute_job_name, run_log_directory))
                #For testing function serially
                """
                run_autoroute_multiprocess_worker((autoroute_executable_location,
                                                   autoroute_manager,
                                                   master_watershed_autoroute_input_directory,
                                                   master_output_flood_map_raster_name,
                                                   master_output_flood_depth_raster_name,
                                                   master_output_shapefile_shp_name,
                                                   delete_flood_map_raster,
                                                   autoroute_job_name,
                                                   run_log_directory))
                """
    if PREPARE_MODE > 0:
        #generate streamflow
        streamflow_job_list = pool_streamflow.imap_unordered(
            prepare_autoroute_streamflow_multiprocess_worker,
            streamflow_job_list,
            chunksize=1)
        for streamflow_job_output in streamflow_job_list:
            print("STREAMFLOW READY: {0}".format(streamflow_job_output))
        pool_streamflow.close()
        pool_streamflow.join()

    print("Running AutoRoute simulations ...")
    #submit jobs to run
    if mode == "multiprocess":
        autoroute_job_info[
            'multiprocess_worker_list'] = pool_main.imap_unordered(
                run_autoroute_multiprocess_worker,
                autoroute_job_info['multiprocess_job_list'],
                chunksize=1)
    else:
        for htcondor_job in autoroute_job_info['htcondor_job_list']:
            htcondor_job.submit()

    if wait_for_all_processes_to_finish:
        #wait for all of the jobs to complete
        if mode == "multiprocess":
            for multi_job_output in autoroute_job_info[
                    'multiprocess_worker_list']:
                print("JOB FINISHED: {0}".format(multi_job_output[3]))
            #just in case ...
            pool_main.close()
            pool_main.join()
        else:
            for htcondor_job_index, htcondor_job in enumerate(
                    autoroute_job_info['htcondor_job_list']):
                htcondor_job.wait()
                print("JOB FINISHED: {0}".format(
                    autoroute_job_info['htcondor_job_info'][htcondor_job_index]
                    ['autoroute_job_name']))

        print("Time to complete entire AutoRoute process: {0}".format(
            datetime.utcnow() - time_start_all))
    else:
        return autoroute_job_info
Пример #16
0
class TestJob(unittest.TestCase):

    expected = None
    actual = None
    msg = None
    base_dir = os.path.join(os.path.dirname(__file__))

    @property
    def output(self):
        return '%s\nExpected: %s\nActual:   %s\n' % (self.msg, self.expected, self.actual)

    @property
    def assert_args(self):
        return (self.expected, self.actual, self.output)


    def setUp(self):
        self.job_name = 'job_name'
        self.job = Job(self.job_name)

    def tearDown(self):
        pass

    def test__init__(self):
        attributes = dict()
        attributes['job_name'] = self.job_name

        self.expected = {'_name': self.job_name,
                    '_attributes': attributes,
                    '_num_jobs': 1,
                    '_cluster_id': 0,
                    '_job_file': '',
                    '_remote': None,
                    '_remote_id': None,
                    '_remote_input_files': None,
                    '_cwd': '.'}
        self.actual = self.job.__dict__
        self.msg = 'testing initialization with default values'
        self.assertDictEqual(*self.assert_args)

        exe = 'exe'
        args = 'args'
        num_jobs = '5'
        self.job = Job(self.job_name, OrderedDict(), num_jobs, executable=exe, arguments=args)
        attributes.update({'executable': exe, 'arguments': args})

        self.expected.update({'_name': self.job_name,
                    '_attributes': attributes,
                    '_num_jobs': int(num_jobs)})
        self.actual = self.job.__dict__
        self.actual['_attributes'] = dict(self.actual['_attributes'])
        self.msg = 'testing initialization with all values supplied'
        self.assertDictEqual(*self.assert_args)

        num_jobs = 'five'
        self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)


    def test__str__(self):
        expected = 'job_name = %s\n\nqueue 1\n' % (self.job_name)
        actual = self.job.__str__()
        msg = 'testing to string with default initialization'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test__repr__(self):
        expected = '<' \
                   'Job: name=%s, num_jobs=%d, cluster_id=%s>' % (self.job_name, 1, 0)
        actual = self.job.__repr__()
        msg = 'testing repr with default initialization'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test__copy__(self):
        original = self.job
        copy = self.job.__copy__()
        expected = original.name
        actual = copy.name
        msg = 'testing that name of copy is equal to original'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = original.attributes
        actual = copy.attributes
        msg = 'testing that attributes dictionary of copy is equal to original'
        self.assertDictEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
        msg = "testing that attributes is the same instance as the original's"
        self.assertIs(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test__deepcopy__(self):
        original = self.job
        memo = dict()
        copy = self.job.__deepcopy__(memo)
        expected = self.job.name
        actual = copy.name
        msg = 'testing that name of deepcopy is equal to original'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = original.attributes
        actual = copy.attributes
        msg = 'testing that attributes dictionary of copy is equal to original'
        self.assertDictEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
        msg = "testing that attributes is not the same instance as the original's"
        self.assertIsNot(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test__getattr__(self):
        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.executable
        msg = 'testing that existing value is returned'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        pass

    def test__setattr__(self):
        pass

    def test_name(self):
        expected = self.job_name
        actual = self.job.name
        msg = 'checking initialization of name'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        new_name = 'new_name'
        self.job.name = new_name

        expected = new_name
        actual = self.job.name
        msg = 'checking assignment of name'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test_attributes(self):
        pass

    def test_num_jobs(self):
        pass

    def test_cluster_id(self):
        pass

    def test_job_file(self):
        job_file_name = '%s.job' % (self.job_name)
        job_file = os.path.join(os.path.relpath(os.getcwd()), job_file_name)
        expected = job_file
        actual = self.job.job_file
        msg = 'checking resolving attribute function for job file'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        init_dir = 'init_dir'
        self.job.initialdir = init_dir
        job_file = os.path.join(init_dir, job_file_name)
        self.assertEqual(job_file, self.job.job_file)

    def test_log_file(self):
        self.job = Job(self.job_name, Templates.base)
        log_file = '%s/%s/%s.%s.log' % (self.job.initial_dir, self.job.logdir, self.job_name, self.job.cluster_id)
        expected = log_file
        actual = self.job.log_file
        msg = 'checking resolving attribute function for log file'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test_initial_dir(self):
        pass

    def test_submit(self):
        pass

    def test_remove(self):
        pass

    def test_edit(self):
        expected = NotImplementedError
        actual = self.job.edit
        self.assertRaises(expected, actual)

    def test_status(self):
        expected = NotImplementedError
        actual = self.job.edit
        self.assertRaises(expected, actual)

    def test_wait(self):
        pass

    def test_get(self):
        non_existent_attr = 'not-there'
        expected = None
        actual = self.job.get(non_existent_attr)
        msg = 'testing that None is returned when attribute does not exist'
        self.assertIsNone(actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = 'expected'
        actual = self.job.get(non_existent_attr, expected)
        msg = 'testing that supplied value is returned when attribute does not exist'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.get('executable')
        msg = 'testing that existing value is returned'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))


    def test_set(self):
        key = 'was-not-there'
        value = 'now-it-is'
        self.job.set(key, value)
        expected = value
        actual = self.job.attributes[key]
        msg = 'testing that attribute that previously does not exist is set correctly'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        key = 'was-already-there'
        value = 'used-to-be-this'
        new_value = 'now-it-is-this'
        self.job.set(key, value)
        self.job.set(key,new_value)
        expected = new_value
        actual = self.job.attributes[key]
        msg = 'testing that attribute that previously existed is re-set correctly'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        key = 'python boolean'
        value = True
        self.job.set(key, value)
        expected = 'true'
        actual = self.job.attributes[key]
        msg = 'testing that an attribute can be set with the Python boolean value "True"'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        key = 'python boolean'
        value = False
        self.job.set(key, value)
        expected = 'false'
        actual = self.job.attributes[key]
        msg = 'testing that an attribute can be set with the Python boolean value "False"'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        key = 'python list'
        value = ['file.txt', 1]
        self.job.set(key, value)
        expected = ', '.join([str(i) for i in value])
        actual = self.job.attributes[key]
        msg = 'testing that an attribute can be set with a Python list'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_delete(self):
        key = 'was-not-there'
        value = 'now-it-is'
        self.job.set(key, value)
        self.job.delete(key)
        member = key
        container = self.job.attributes
        msg = 'testing that attribute is removed when deleted'
        self.assertNotIn(member, container, msg)


    def test_write_job_file(self):
        pass

    def test_list_attributes(self):
        pass

    def test_make_dir(self):
        pass

    def test_make_job_dirs(self):
        pass

    def test_resolve_attribute(self):
        job = Job(self.job_name, Templates.vanilla_base)
        expected = self.job_name
        actual = job._resolve_attribute('initialdir')
        msg = 'checking resolving attribute function'
        self.assertEqual(expected, actual, '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_resolve_attribute_match(self):
        pass

    def test_remote(self):
        working_dir = os.path.join(self.base_dir, 'test_files', 'working_dir')
        self.job = Job('remote_test',
                       Templates.vanilla_transfer_files,
                       host='localhost',
                       username=os.environ['USER'],
                       private_key='~/.ssh/id_rsa',
                       remote_input_files=['../copy_test.py', 'input.txt'],
                       transfer_input_files='../input.txt',
                       working_directory=working_dir)

        remote_base_path = os.path.expanduser('~/' + self.job._remote_id)
        if os.path.exists(remote_base_path):
            raise
        self.job._write_job_file()
        self.assertTrue(os.path.exists(remote_base_path))

        self.job.sync_remote_output()
        local_output = os.path.join(working_dir, self.job.name)
        self.assertTrue(os.path.exists(local_output))
        shutil.rmtree(remote_base_path)
        shutil.rmtree(local_output)
Пример #17
0
 def setUp(self):
     self.job_name = 'job_name'
     self.job = Job(self.job_name)
Пример #18
0
def run_ecmwf_forecast_process(
        rapid_executable_location,  # path to RAPID executable
        rapid_io_files_location,  # path ro RAPID input/output directory
        ecmwf_forecast_location,  # path to ECMWF forecasts
        subprocess_log_directory,  # path to store HTCondor/multiprocess logs
        main_log_directory,  # path to store main logs
        region="",  #1 of the 12 partitioned ECMWF files. Leave empty if using global,
        data_store_url="",  # CKAN API url
        data_store_api_key="",  # CKAN API Key,
        data_store_owner_org="",  # CKAN owner organization
        app_instance_id="",  # Streamflow Prediction tool instance ID
        sync_rapid_input_with_ckan=False,  # match Streamflow Prediciton tool RAPID input
        download_ecmwf=True,  # Download recent ECMWF forecast before running,
        date_string="",  # string of date of interest
        ftp_host="",  # ECMWF ftp site path
        ftp_login="",  # ECMWF ftp login name
        ftp_passwd="",  # ECMWF ftp password
        ftp_directory="",  # ECMWF ftp directory
        delete_past_ecmwf_forecasts=True,  # Deletes all past forecasts before next run
        upload_output_to_ckan=False,  # upload data to CKAN and remove local copy
        delete_output_when_done=False,  # delete all output data from this code
        initialize_flows=False,  # use forecast to initialize next run
        warning_flow_threshold=100,  # flows below this threshold will be ignored
        era_interim_data_location="",  # path to ERA Interim return period data
        create_warning_points=False,  # generate waring points for Streamflow Prediction Tool
        autoroute_executable_location="",  # location of AutoRoute executable
        autoroute_io_files_location="",  # path to AutoRoute input/outpuf directory
        geoserver_url="",  # url to API endpoint ending in geoserver/rest
        geoserver_username="",  # username for geoserver
        geoserver_password="",  # password for geoserver
        mp_mode='htcondor',  # valid options are htcondor and multiprocess,
        mp_execute_directory="",  # required if using multiprocess mode
        initialization_time_step=12,  # time step of ECMWF Forecast Process, in hours
        #doesn't appear to be used MJS 8/23/2020... watersheds_with_dams_list=[], # a list of all watersheds where dam outflows are being forced
        #doesn't appear to be used, MJS 8/23/2020... stream_ids_with_dams_dict={}, # a dictionary with the watershed key and a value of a list of stream IDs where dams are located
        #doesn't appear to be used, MJS 8/23/2020... dam_outflows={} # a dictionary with the key as a stream ID and a value of a list of outflows
    BS_opt_dam=False,
        IS_dam_tot=0,
        IS_dam_use=0,
        dam_tot_id_file="",
        dam_use_id_file="",
        dam_file=""):
    """
    This it the main ECMWF RAPID forecast process
    """
    time_begin_all = datetime.datetime.utcnow()

    LOCAL_SCRIPTS_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
    LOCK_INFO_FILE = os.path.join(main_log_directory,
                                  "spt_compute_ecmwf_run_info_lock.txt")

    log_file_path = os.path.join(
        main_log_directory, "spt_compute_ecmwf_{0}.log".format(
            time_begin_all.strftime("%y%m%d%H%M%S")))

    with CaptureStdOutToLog(log_file_path):

        if not CONDOR_ENABLED and mp_mode == 'htcondor':
            raise ImportError(
                "condorpy is not installed. Please install condorpy to use the 'htcondor' option."
            )

        if not AUTOROUTE_ENABLED and autoroute_executable_location and autoroute_io_files_location:
            raise ImportError(
                "AutoRoute is not enabled. Please install tethys_dataset_services"
                " and AutoRoutePy to use the AutoRoute option.")

        if mp_mode == "multiprocess":
            if not mp_execute_directory or not os.path.exists(
                    mp_execute_directory):
                raise Exception(
                    "If mode is multiprocess, mp_execute_directory is required ..."
                )

        if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
            # sync with data store
            ri_manager = RAPIDInputDatasetManager(data_store_url,
                                                  data_store_api_key, 'ecmwf',
                                                  app_instance_id)
            ri_manager.sync_dataset(
                os.path.join(rapid_io_files_location, 'input'))

        # clean up old log files
        clean_logs(subprocess_log_directory,
                   main_log_directory,
                   log_file_path=log_file_path)

        data_manager = None
        if upload_output_to_ckan and data_store_url and data_store_api_key:
            if not SPT_DATASET_ENABLED:
                raise ImportError(
                    "spt_dataset_manager is not installed. "
                    "Please install spt_dataset_manager to use the 'ckan' options."
                )

            # init data manager for CKAN
            data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                    data_store_api_key,
                                                    data_store_owner_org)

        # get list of correclty formatted rapid input directories in rapid directory
        rapid_input_directories = get_valid_watershed_list(
            os.path.join(rapid_io_files_location, "input"))

        if download_ecmwf and ftp_host:
            # get list of folders to download
            ecmwf_folders = sorted(
                get_ftp_forecast_list(
                    'Runoff.%s*%s*.netcdf.tar*' % (date_string, region),
                    ftp_host, ftp_login, ftp_passwd, ftp_directory))
        else:
            # get list of folders to run
            ecmwf_folders = sorted(
                glob(
                    os.path.join(ecmwf_forecast_location,
                                 'Runoff.' + date_string + '*.netcdf')))

        # LOAD LOCK INFO FILE
        last_forecast_date = datetime.datetime.utcfromtimestamp(0)
        if os.path.exists(LOCK_INFO_FILE):
            with open(LOCK_INFO_FILE) as fp_lock_info:
                previous_lock_info = json.load(fp_lock_info)

            if previous_lock_info['running']:
                print("Another SPT ECMWF forecast process is running.\n"
                      "The lock file is located here: {0}\n"
                      "If this is an error, you have two options:\n"
                      "1) Delete the lock file.\n"
                      "2) Edit the lock file and set \"running\" to false. \n"
                      "Then, re-run this script. \n Exiting ...".format(
                          LOCK_INFO_FILE))
                return
            else:
                last_forecast_date = datetime.datetime.strptime(
                    previous_lock_info['last_forecast_date'], '%Y%m%d%H')
                run_ecmwf_folders = []
                for ecmwf_folder in ecmwf_folders:
                    # get date
                    forecast_date = get_datetime_from_forecast_folder(
                        ecmwf_folder)
                    # if more recent, add to list
                    # check to determine if forecast time step is 12 or 24 hours
                    if initialization_time_step == 24:
                        if forecast_date > last_forecast_date and forecast_date.hour != 12:
                            run_ecmwf_folders.append(ecmwf_folder)
                    elif initialization_time_step == 12:
                        if forecast_date > last_forecast_date:
                            run_ecmwf_folders.append(ecmwf_folder)
                ecmwf_folders = run_ecmwf_folders

        if not ecmwf_folders:
            print("No new forecasts found to run. Exiting ...")
            return

        # GENERATE NEW LOCK INFO FILE
        update_lock_info_file(LOCK_INFO_FILE, True,
                              last_forecast_date.strftime('%Y%m%d%H'))

        # rapid_input_directories_sub = [rapid_input_directory for rapid_input_directory in rapid_input_directories if hydroshed_index in rapid_input_directory]
        # Try/Except added for lock file
        try:
            # ADD SEASONAL INITIALIZATION WHERE APPLICABLE
            if initialize_flows:
                initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folders[0])
                seasonal_init_job_list = []
                for rapid_input_directory in rapid_input_directories:
                    seasonal_master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    # add seasonal initialization if no initialization file and historical Qout file exists
                    if era_interim_data_location and os.path.exists(
                            era_interim_data_location):
                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            # INITIALIZE FROM SEASONAL AVERAGE FILE
                            seasonal_streamflow_file = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "seasonal_average*.nc"))
                            if seasonal_streamflow_file:
                                seasonal_init_job_list.append(
                                    (seasonal_streamflow_file[0],
                                     seasonal_master_watershed_input_directory,
                                     initial_forecast_date_timestep,
                                     "seasonal_average_file"))
                            else:
                                # INITIALIZE FROM HISTORICAL STREAMFLOW FILE
                                historical_qout_file = glob(
                                    os.path.join(
                                        era_interim_watershed_directory,
                                        "Qout*.nc"))
                                if historical_qout_file:
                                    seasonal_init_job_list.append((
                                        historical_qout_file[0],
                                        seasonal_master_watershed_input_directory,
                                        initial_forecast_date_timestep,
                                        "historical_streamflow_file"))
                if seasonal_init_job_list:
                    # use multiprocessing instead of htcondor due to potential for huge file sizes
                    if len(seasonal_init_job_list) > 1:
                        seasonal_pool = mp_Pool()
                        seasonal_pool.imap(
                            compute_seasonal_initial_rapid_flows_multicore_worker,
                            seasonal_init_job_list,
                            chunksize=1)
                        seasonal_pool.close()
                        seasonal_pool.join()
                    else:
                        compute_seasonal_initial_rapid_flows_multicore_worker(
                            seasonal_init_job_list[0])
            # ----------------------------------------------------------------------
            # BEGIN ECMWF-RAPID FORECAST LOOP
            # ----------------------------------------------------------------------
            master_job_info_list = []
            for ecmwf_folder in ecmwf_folders:
                if download_ecmwf:
                    # download forecast
                    ecmwf_folder = download_and_extract_ftp(
                        ecmwf_forecast_location, ecmwf_folder, ftp_host,
                        ftp_login, ftp_passwd, ftp_directory,
                        delete_past_ecmwf_forecasts)

                # get list of forecast files
                ecmwf_forecasts = glob(
                    os.path.join(ecmwf_folder, '*.runoff.%s*nc' % region))

                # look for old version of forecasts
                if not ecmwf_forecasts:
                    ecmwf_forecasts = glob(os.path.join(ecmwf_folder, 'full_*.runoff.netcdf')) + \
                                      glob(os.path.join(ecmwf_folder, '*.52.205.*.runoff.netcdf'))

                if not ecmwf_forecasts:
                    print("ERROR: Forecasts not found in folder. Exiting ...")
                    update_lock_info_file(
                        LOCK_INFO_FILE, False,
                        last_forecast_date.strftime('%Y%m%d%H'))
                    return

                # make the largest files first
                ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

                forecast_date_timestep = get_date_timestep_from_forecast_folder(
                    ecmwf_folder)
                print("Running ECMWF Forecast: {0}".format(
                    forecast_date_timestep))

                # submit jobs to downsize ecmwf files to watershed
                rapid_watershed_jobs = {}
                for rapid_input_directory in rapid_input_directories:

                    # keep list of jobs
                    rapid_watershed_jobs[rapid_input_directory] = {
                        'jobs': [],
                        'jobs_info': []
                    }
                    print("Running forecasts for: {0} {1}".format(
                        rapid_input_directory, os.path.basename(ecmwf_folder)))

                    watershed, subbasin = get_watershed_subbasin_from_folder(
                        rapid_input_directory)
                    master_watershed_input_directory = os.path.join(
                        rapid_io_files_location, "input",
                        rapid_input_directory)
                    master_watershed_outflow_directory = os.path.join(
                        rapid_io_files_location, 'output',
                        rapid_input_directory, forecast_date_timestep)
                    try:
                        os.makedirs(master_watershed_outflow_directory)
                    except OSError:
                        pass

                    # initialize HTCondor/multiprocess Logging Directory
                    subprocess_forecast_log_dir = os.path.join(
                        subprocess_log_directory, forecast_date_timestep)
                    try:
                        os.makedirs(subprocess_forecast_log_dir)
                    except OSError:
                        pass

                    # add USGS gage data to initialization file
                    if initialize_flows:
                        # update intial flows with usgs data
                        update_inital_flows_usgs(
                            master_watershed_input_directory,
                            forecast_date_timestep)

                    # create jobs for HTCondor/multiprocess
                    for watershed_job_index, forecast in enumerate(
                            ecmwf_forecasts):
                        ensemble_number = get_ensemble_number_from_forecast(
                            forecast)

                        # get basin names
                        outflow_file_name = 'Qout_%s_%s_%s.nc' % (
                            watershed.lower(), subbasin.lower(),
                            ensemble_number)
                        node_rapid_outflow_file = outflow_file_name
                        master_rapid_outflow_file = os.path.join(
                            master_watershed_outflow_directory,
                            outflow_file_name)

                        job_name = 'job_%s_%s_%s_%s' % (forecast_date_timestep,
                                                        watershed, subbasin,
                                                        ensemble_number)

                        rapid_watershed_jobs[rapid_input_directory][
                            'jobs_info'].append({
                                'watershed':
                                watershed,
                                'subbasin':
                                subbasin,
                                'outflow_file_name':
                                master_rapid_outflow_file,
                                'forecast_date_timestep':
                                forecast_date_timestep,
                                'ensemble_number':
                                ensemble_number,
                                'master_watershed_outflow_directory':
                                master_watershed_outflow_directory,
                                'data_manager':
                                data_manager  # added this to try to upload forecast in mp
                            })
                        if mp_mode == "htcondor":
                            # create job to downscale forecasts for watershed
                            job = CJob(job_name, tmplt.vanilla_transfer_files)
                            job.set(
                                'executable',
                                os.path.join(LOCAL_SCRIPTS_DIRECTORY,
                                             'htcondor_ecmwf_rapid.py'))
                            job.set(
                                'transfer_input_files', "%s, %s, %s" %
                                (forecast, master_watershed_input_directory,
                                 LOCAL_SCRIPTS_DIRECTORY))
                            job.set('initialdir', subprocess_forecast_log_dir)
                            job.set(
                                'arguments', '%s %s %s %s %s %s' %
                                (forecast, forecast_date_timestep,
                                 watershed.lower(), subbasin.lower(),
                                 rapid_executable_location, initialize_flows))
                            job.set(
                                'transfer_output_remaps',
                                "\"%s = %s\"" % (node_rapid_outflow_file,
                                                 master_rapid_outflow_file))
                            job.submit()
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append(job)
                        elif mp_mode == "multiprocess":
                            rapid_watershed_jobs[rapid_input_directory][
                                'jobs'].append((
                                    forecast,
                                    forecast_date_timestep,
                                    watershed.lower(),
                                    subbasin.lower(),
                                    rapid_executable_location,
                                    initialize_flows,
                                    job_name,
                                    master_rapid_outflow_file,
                                    master_watershed_input_directory,
                                    mp_execute_directory,
                                    subprocess_forecast_log_dir,
                                    watershed_job_index,
                                    initialization_time_step,
                                    # dam arguments included, MJS 8/23/2020 ........
                                    BS_opt_dam,
                                    IS_dam_tot,
                                    IS_dam_use,
                                    dam_tot_id_file,
                                    dam_use_id_file,
                                    dam_file))
                            # COMMENTED CODE FOR DEBUGGING SERIALLY
                            ##                    run_ecmwf_rapid_multiprocess_worker((forecast,
                            ##                                                         forecast_date_timestep,
                            ##                                                         watershed.lower(),
                            ##                                                         subbasin.lower(),
                            ##                                                         rapid_executable_location,
                            ##                                                         initialize_flows,
                            ##                                                         job_name,
                            ##                                                         master_rapid_outflow_file,
                            ##                                                         master_watershed_input_directory,
                            ##                                                         mp_execute_directory,
                            ##                                                         subprocess_forecast_log_dir,
                            ##                                                         watershed_job_index
                            ##                                                         initialization_time_step))
                        else:
                            raise Exception(
                                "ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ..."
                            )

                for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.items(
                ):

                    # add sub job list to master job list
                    master_job_info_list = master_job_info_list + watershed_job_info[
                        'jobs_info']
                    if mp_mode == "htcondor":
                        # wait for jobs to finish then upload files
                        for job_index, job in enumerate(
                                watershed_job_info['jobs']):
                            job.wait()
                            # upload file when done
                            if data_manager:
                                upload_single_forecast(
                                    watershed_job_info['jobs_info'][job_index],
                                    data_manager)

                    elif mp_mode == "multiprocess":
                        pool_main = mp_Pool()
                        func = partial(run_ecmwf_rapid_multiprocess_worker,
                                       watershed_job_info['jobs_info'])
                        multiprocess_worker_list = pool_main.imap_unordered(
                            func,
                            watershed_job_info['jobs'],
                            # watershed_job_info['jobs'],
                            chunksize=1)
                        if data_manager:
                            for multi_job_index in multiprocess_worker_list:
                                # upload file when done
                                upload_single_forecast(
                                    watershed_job_info['jobs_info']
                                    [multi_job_index], data_manager)

                        # just in case ...
                        pool_main.close()
                        pool_main.join()

                    # when all jobs in watershed are done, generate warning points
                    if create_warning_points:
                        watershed, subbasin = get_watershed_subbasin_from_folder(
                            rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)

                        era_interim_watershed_directory = os.path.join(
                            era_interim_data_location, rapid_input_directory)
                        if os.path.exists(era_interim_watershed_directory):
                            print(
                                "Generating warning points for {0}-{1} from {2}"
                                .format(watershed, subbasin,
                                        forecast_date_timestep))
                            era_interim_files = glob(
                                os.path.join(era_interim_watershed_directory,
                                             "return_period*.nc"))
                            if era_interim_files:
                                try:
                                    generate_ecmwf_warning_points(
                                        forecast_directory,
                                        era_interim_files[0],
                                        forecast_directory,
                                        threshold=warning_flow_threshold)
                                    if upload_output_to_ckan and data_store_url and data_store_api_key:
                                        data_manager.initialize_run_ecmwf(
                                            watershed, subbasin,
                                            forecast_date_timestep)
                                        data_manager.zip_upload_warning_points_in_directory(
                                            forecast_directory)
                                except Exception as ex:
                                    print(ex)
                                    pass
                            else:
                                print(
                                    "No ERA Interim file found. Skipping ...")
                        else:
                            print(
                                "No ERA Interim directory found for {0}. "
                                "Skipping warning point generation...".format(
                                    rapid_input_directory))

                # initialize flows for next run
                if initialize_flows:
                    # create new init flow files/generate warning point files
                    for rapid_input_directory in rapid_input_directories:
                        input_directory = os.path.join(rapid_io_files_location,
                                                       'input',
                                                       rapid_input_directory)
                        forecast_directory = os.path.join(
                            rapid_io_files_location, 'output',
                            rapid_input_directory, forecast_date_timestep)
                        if os.path.exists(forecast_directory):
                            # loop through all the rapid_namelist files in directory
                            watershed, subbasin = get_watershed_subbasin_from_folder(
                                rapid_input_directory)
                            if initialize_flows:
                                print(
                                    "Initializing flows for {0}-{1} from {2}".
                                    format(watershed, subbasin,
                                           forecast_date_timestep))
                                basin_files = find_current_rapid_output(
                                    forecast_directory, watershed, subbasin)
                                try:
                                    compute_initial_rapid_flows(
                                        basin_files, input_directory,
                                        forecast_date_timestep,
                                        initialization_time_step)
                                except Exception as ex:
                                    print(ex)
                                    pass

                # run autoroute process if added
                if autoroute_executable_location and autoroute_io_files_location:
                    # run autoroute on all of the watersheds
                    run_autorapid_process(autoroute_executable_location,
                                          autoroute_io_files_location,
                                          rapid_io_files_location,
                                          forecast_date_timestep,
                                          subprocess_forecast_log_dir,
                                          geoserver_url, geoserver_username,
                                          geoserver_password, app_instance_id)

                last_forecast_date = get_datetime_from_date_timestep(
                    forecast_date_timestep)

                # update lock info file with next forecast
                update_lock_info_file(LOCK_INFO_FILE, True,
                                      last_forecast_date.strftime('%Y%m%d%H'))

                # ----------------------------------------------------------------------
                # END FORECAST LOOP
                # ----------------------------------------------------------------------
        except Exception as ex:
            print_exc()
            print(ex)
            pass

        # Release & update lock info file with all completed forecasts
        update_lock_info_file(LOCK_INFO_FILE, False,
                              last_forecast_date.strftime('%Y%m%d%H'))

        if delete_output_when_done:
            # delete local datasets
            for job_info in master_job_info_list:
                try:
                    rmtree(job_info['master_watershed_outflow_directory'])
                except OSError:
                    pass
            # delete watershed folder if empty
            for item in os.listdir(
                    os.path.join(rapid_io_files_location, 'output')):
                try:
                    os.rmdir(
                        os.path.join(rapid_io_files_location, 'output', item))
                except OSError:
                    pass

        # print info to user
        time_end = datetime.datetime.utcnow()
        print("Time Begin: {0}".format(time_begin_all))
        print("Time Finish: {0}".format(time_end))
        print("TOTAL TIME: {0}".format(time_end - time_begin_all))
def run_ecmwf_rapid_process(rapid_executable_location, #path to RAPID executable
                            rapid_io_files_location, #path ro RAPID input/output directory
                            ecmwf_forecast_location, #path to ECMWF forecasts
                            subprocess_log_directory, #path to store HTCondor/multiprocess logs
                            main_log_directory, #path to store main logs
                            data_store_url="", #CKAN API url
                            data_store_api_key="", #CKAN API Key,
                            data_store_owner_org="", #CKAN owner organization
                            app_instance_id="", #Streamflow Prediction tool instance ID
                            sync_rapid_input_with_ckan=False, #match Streamflow Prediciton tool RAPID input
                            download_ecmwf=True, #Download recent ECMWF forecast before running,
                            date_string=None, #string of date of interest
                            ftp_host="", #ECMWF ftp site path
                            ftp_login="", #ECMWF ftp login name
                            ftp_passwd="", #ECMWF ftp password
                            ftp_directory="", #ECMWF ftp directory
                            upload_output_to_ckan=False, #upload data to CKAN and remove local copy
                            delete_output_when_done=False, #delete all output data from this code
                            initialize_flows=False, #use forecast to initialize next run
                            era_interim_data_location="", #path to ERA Interim return period data 
                            create_warning_points=False, #generate waring points for Streamflow Prediction Tool
                            autoroute_executable_location="", #location of AutoRoute executable
                            autoroute_io_files_location="", #path to AutoRoute input/outpuf directory
                            geoserver_url='', #url to API endpoint ending in geoserver/rest
                            geoserver_username='', #username for geoserver
                            geoserver_password='', #password for geoserver
                            mp_mode='htcondor', #valid options are htcondor and multiprocess,
                            mp_execute_directory='',#required if using multiprocess mode
                            ):
    """
    This it the main ECMWF RAPID process
    """
    time_begin_all = datetime.datetime.utcnow()
    if date_string == None:
        date_string = time_begin_all.strftime('%Y%m%d')

    if mp_mode == "multiprocess":
        if not mp_execute_directory or not os.path.exists(mp_execute_directory):
            raise Exception("If mode is multiprocess, mp_execute_directory is required ...")
            
    #date_string = datetime.datetime(2016,2,12).strftime('%Y%m%d')
    local_scripts_location = os.path.dirname(os.path.realpath(__file__))

    if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
        #sync with data store
        ri_manager = RAPIDInputDatasetManager(data_store_url,
                                              data_store_api_key,
                                              'ecmwf',
                                              app_instance_id)
        ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input'))

    #clean up old log files
    clean_logs(subprocess_log_directory, main_log_directory)

    #get list of correclty formatted rapid input directories in rapid directory
    rapid_input_directories = get_valid_watershed_list(os.path.join(rapid_io_files_location, "input"))
    
    if download_ecmwf and ftp_host:
        #download all files for today
        ecmwf_folders = sorted(download_all_ftp(ecmwf_forecast_location,
                                                'Runoff.%s*.netcdf.tar*' % date_string,
                                                ftp_host,
                                                ftp_login,
                                                ftp_passwd,
                                                ftp_directory))
    else:
        ecmwf_folders = sorted(glob(os.path.join(ecmwf_forecast_location,
                                                 'Runoff.'+date_string+'*.netcdf')))
    data_manager = None
    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #init data manager for CKAN
        data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                data_store_api_key,
                                                data_store_owner_org)

    #ADD SEASONAL INITIALIZATION WHERE APPLICABLE
    if initialize_flows:
        initial_forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folders[0])
        seasonal_init_job_list = []
        for rapid_input_directory in rapid_input_directories:
            seasonal_master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            #add seasonal initialization if no initialization file and historical Qout file exists
            if era_interim_data_location and os.path.exists(era_interim_data_location):
                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    historical_qout_file = glob(os.path.join(era_interim_watershed_directory, "Qout*.nc"))
                    if historical_qout_file:
                        seasonal_init_job_list.append((historical_qout_file[0], 
                                                       seasonal_master_watershed_input_directory,
                                                       initial_forecast_date_timestep))
        if seasonal_init_job_list:
            #use multiprocessing instead of htcondor due to potential for huge file sizes
            if len(seasonal_init_job_list) > 1:
                seasonal_pool = mp_Pool()
                seasonal_pool.imap(compute_seasonal_initial_rapid_flows_multicore_worker,
                                   seasonal_init_job_list,
                                   chunksize=1)
                seasonal_pool.close()
                seasonal_pool.join()
            else:
                compute_seasonal_initial_rapid_flows_multicore_worker(seasonal_init_job_list[0])

    #prepare ECMWF files
    master_job_info_list = []
    for ecmwf_folder in ecmwf_folders:
        ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'full_*.runoff.netcdf')) + \
                          glob(os.path.join(ecmwf_folder,'*.52.205.*.runoff.netcdf'))
        #look for new version of forecasts
        if not ecmwf_forecasts:
            ecmwf_forecasts = glob(os.path.join(ecmwf_folder,'*.runoff.nc'))
            
        #make the largest files first
        ecmwf_forecasts.sort(key=os.path.getsize, reverse=True)

        forecast_date_timestep = get_date_timestep_from_forecast_folder(ecmwf_folder)
        print forecast_date_timestep
        #submit jobs to downsize ecmwf files to watershed
        iteration = 0
        rapid_watershed_jobs = {}
        for rapid_input_directory in rapid_input_directories:
            #keep list of jobs
            rapid_watershed_jobs[rapid_input_directory] = {
                                                            'jobs': [], 
                                                            'jobs_info': []
                                                           }
            print "Running forecasts for:", rapid_input_directory, os.path.basename(ecmwf_folder)
            watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
            master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
            master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output',
                                                              rapid_input_directory, forecast_date_timestep)
            #add USGS gage data to initialization file
            if initialize_flows:
                #update intial flows with usgs data
                update_inital_flows_usgs(master_watershed_input_directory, 
                                         forecast_date_timestep)
            
            #create jobs for HTCondor
            for watershed_job_index, forecast in enumerate(ecmwf_forecasts):
                ensemble_number = get_ensemble_number_from_forecast(forecast)
                try:
                    os.makedirs(master_watershed_outflow_directory)
                except OSError:
                    pass

                #initialize HTCondor Logging Directory
                subprocess_forecast_log_dir = os.path.join(subprocess_log_directory, forecast_date_timestep)
                try:
                    os.makedirs(subprocess_forecast_log_dir)
                except OSError:
                    pass
                
                #get basin names
                outflow_file_name = 'Qout_%s_%s_%s.nc' % (watershed.lower(), subbasin.lower(), ensemble_number)
                node_rapid_outflow_file = outflow_file_name
                master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name)

                job_name = 'job_%s_%s_%s_%s_%s' % (forecast_date_timestep, watershed, subbasin, ensemble_number, iteration)
                if mp_mode == "htcondor":
                    #create job to downscale forecasts for watershed
                    job = CJob(job_name, tmplt.vanilla_transfer_files)
                    job.set('executable',os.path.join(local_scripts_location,'htcondor_ecmwf_rapid.py'))
                    job.set('transfer_input_files', "%s, %s, %s" % (forecast, master_watershed_input_directory, local_scripts_location))
                    job.set('initialdir', subprocess_forecast_log_dir)
                    job.set('arguments', '%s %s %s %s %s %s' % (forecast, forecast_date_timestep, watershed.lower(), subbasin.lower(),
                                                                rapid_executable_location, initialize_flows))
                    job.set('transfer_output_remaps',"\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file))
                    job.submit()
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append(job)
                    rapid_watershed_jobs[rapid_input_directory]['jobs_info'].append({'watershed' : watershed,
                                                                                     'subbasin' : subbasin,
                                                                                     'outflow_file_name' : master_rapid_outflow_file,
                                                                                     'forecast_date_timestep' : forecast_date_timestep,
                                                                                     'ensemble_number': ensemble_number,
                                                                                     'master_watershed_outflow_directory': master_watershed_outflow_directory,
                                                                                     })
                elif mp_mode == "multiprocess":
                    rapid_watershed_jobs[rapid_input_directory]['jobs'].append((forecast,
                                                                                forecast_date_timestep,
                                                                                watershed.lower(),
                                                                                subbasin.lower(),
                                                                                rapid_executable_location,
                                                                                initialize_flows,
                                                                                job_name,
                                                                                master_rapid_outflow_file,
                                                                                master_watershed_input_directory,
                                                                                mp_execute_directory,
                                                                                subprocess_forecast_log_dir,
                                                                                watershed_job_index))
##                    run_ecmwf_rapid_multiprocess_worker((forecast,
##                                                         forecast_date_timestep,
##                                                         watershed.lower(),
##                                                         subbasin.lower(),
##                                                         rapid_executable_location,
##                                                         initialize_flows,
##                                                         job_name,
##                                                         master_rapid_outflow_file,
##                                                         master_watershed_input_directory,
##                                                         mp_execute_directory,
##                                                         subprocess_forecast_log_dir,                     
##                                                         watershed_job_index))
                else:
                    raise Exception("ERROR: Invalid mp_mode. Valid types are htcondor and multiprocess ...")
                iteration += 1
        
        
        for rapid_input_directory, watershed_job_info in rapid_watershed_jobs.iteritems():
            #add sub job list to master job list
            master_job_info_list = master_job_info_list + watershed_job_info['jobs_info']
            if mp_mode == "htcondor":
                #wait for jobs to finish then upload files
                for job_index, job in enumerate(watershed_job_info['jobs']):
                    job.wait()
                    #upload file when done
                    if data_manager:
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
            elif mp_mode == "multiprocess":
                pool_main = mp_Pool()
                multiprocess_worker_list = pool_main.imap_unordered(run_ecmwf_rapid_multiprocess_worker, 
                                                                    watershed_job_info['jobs'], 
                                                                    chunksize=1)
                if data_manager:
                    for multi_job_output in multiprocess_worker_list:
                        job_index = multi_job_output[0]
                        #upload file when done
                        upload_single_forecast(watershed_job_info['jobs_info'][job_index], data_manager)
                        
                #just in case ...
                pool_main.close()
                pool_main.join()

            #when all jobs in watershed are done, generate warning points
            if create_warning_points:
                watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)

                era_interim_watershed_directory = os.path.join(era_interim_data_location, rapid_input_directory)
                if os.path.exists(era_interim_watershed_directory):
                    print "Generating Warning Points for", watershed, subbasin, "from", forecast_date_timestep
                    era_interim_files = glob(os.path.join(era_interim_watershed_directory, "return_period*.nc"))
                    if era_interim_files:
                        try:
                            generate_warning_points(forecast_directory, era_interim_files[0], forecast_directory, threshold=10)
                            if upload_output_to_ckan and data_store_url and data_store_api_key:
                                data_manager.initialize_run_ecmwf(watershed, subbasin, forecast_date_timestep)
                                data_manager.zip_upload_warning_points_in_directory(forecast_directory)
                        except Exception, ex:
                            print ex
                            pass
                    else:
                        print "No ERA Interim file found. Skipping ..."
                else:
                    print "No ERA Interim directory found for", rapid_input_directory, ". Skipping warning point generation..."
            

        #initialize flows for next run
        if initialize_flows:
            #create new init flow files/generate warning point files
            for rapid_input_directory in rapid_input_directories:
                input_directory = os.path.join(rapid_io_files_location, 
                                               'input', 
                                               rapid_input_directory)
                forecast_directory = os.path.join(rapid_io_files_location, 
                                                  'output', 
                                                  rapid_input_directory, 
                                                  forecast_date_timestep)
                if os.path.exists(forecast_directory):
                    #loop through all the rapid_namelist files in directory
                    watershed, subbasin = get_watershed_subbasin_from_folder(rapid_input_directory)
                    if initialize_flows:
                        print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep
                        basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin)
                        try:
                            compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep)
                        except Exception, ex:
                            print ex
                            pass
Пример #20
0
class TestJob(unittest.TestCase):
    def setUp(self):
        self.job_name = 'job_name'
        self.job = Job(self.job_name)

    def tearDown(self):
        pass

    def test__init__(self):
        attributes = OrderedDict()
        attributes['job_name'] = self.job_name
        attributes['executable'] = None
        attributes['arguments'] = None

        expected = {
            '_name': self.job_name,
            '_attributes': attributes,
            '_num_jobs': 1,
            '_cluster_id': 0,
            '_job_file': ''
        }
        actual = self.job.__dict__
        msg = 'testing initialization with default values'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual: %s\n' % (msg, expected, actual))

        exe = 'exe'
        args = '-args'
        num_jobs = '5'
        self.job = Job(self.job_name, OrderedDict(), exe, args, num_jobs)
        attributes['executable'] = exe
        attributes['arguments'] = args

        expected = {
            '_name': self.job_name,
            '_attributes': attributes,
            '_num_jobs': int(num_jobs),
            '_cluster_id': 0,
            '_job_file': ''
        }
        actual = self.job.__dict__
        msg = 'testing initialization with all values supplied'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        num_jobs = 'five'
        self.assertRaises(ValueError, Job, self.job_name, num_jobs=num_jobs)

    def test__str__(self):
        expected = 'job_name = %s\n\nqueue 1\n' % (self.job_name)
        actual = self.job.__str__()
        msg = 'testing to string with default initialization'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test__repr__(self):
        expected = '<' \
                   'Job: name=%s, num_jobs=%d, cluster_id=%s>' % (self.job_name, 1, 0)
        actual = self.job.__repr__()
        msg = 'testing repr with default initialization'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test__copy__(self):
        original = self.job
        copy = self.job.__copy__()
        expected = original.name
        actual = copy.name
        msg = 'testing that name of copy is equal to original'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = original.attributes
        actual = copy.attributes
        msg = 'testing that attributes dictionary of copy is equal to original'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
        msg = "testing that attributes is the same instance as the original's"
        self.assertIs(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test__deepcopy__(self):
        original = self.job
        memo = dict()
        copy = self.job.__deepcopy__(memo)
        expected = self.job.name
        actual = copy.name
        msg = 'testing that name of deepcopy is equal to original'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = original.attributes
        actual = copy.attributes
        msg = 'testing that attributes dictionary of copy is equal to original'
        self.assertDictEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))
        msg = "testing that attributes is not the same instance as the original's"
        self.assertIsNot(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test__getattr__(self):
        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.executable
        msg = 'testing that existing value is returned'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        pass

    def test__setattr__(self):
        pass

    def test_name(self):
        expected = self.job_name
        actual = self.job.name
        msg = 'checking initialization of name'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        new_name = 'new_name'
        self.job.name = new_name

        expected = new_name
        actual = self.job.name
        msg = 'checking assignment of name'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_attributes(self):
        pass

    def test_num_jobs(self):
        pass

    def test_cluster_id(self):
        pass

    def test_job_file(self):
        job_file_name = '%s.job' % (self.job_name)
        job_file = os.path.join(os.getcwd(), job_file_name)
        expected = job_file
        actual = self.job.job_file
        msg = 'checking resolving attribute function for job file'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        init_dir = 'init_dir'
        self.job.initialdir = init_dir
        job_file = os.path.join(init_dir, job_file_name)
        self.assertEqual(job_file, self.job.job_file)

    def test_log_file(self):
        self.job = Job(self.job_name, Templates.base)
        log_file = '%s/%s.%s.log' % (self.job.logdir, self.job_name,
                                     self.job.cluster_id)
        expected = log_file
        actual = self.job.log_file
        msg = 'checking resolving attribute function for log file'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_initial_dir(self):
        pass

    def test_submit(self):
        pass

    def test_remove(self):
        pass

    def test_edit(self):
        expected = NotImplementedError
        actual = self.job.edit
        self.assertRaises(expected, actual)

    def test_status(self):
        expected = NotImplementedError
        actual = self.job.edit
        self.assertRaises(expected, actual)

    def test_wait(self):
        self.job.wait()

    def test_get(self):
        non_existent_attr = 'not-there'
        expected = None
        actual = self.job.get(non_existent_attr)
        msg = 'testing that None is returned when attribute does not exist'
        self.assertIsNone(
            actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        expected = 'expected'
        actual = self.job.get(non_existent_attr, expected)
        msg = 'testing that supplied value is returned when attribute does not exist'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        exe = 'exe'
        self.job = Job(self.job_name, executable=exe)
        expected = exe
        actual = self.job.get('executable')
        msg = 'testing that existing value is returned'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_set(self):
        key = 'was-not-there'
        value = 'now-it-is'
        self.job.set(key, value)
        expected = value
        actual = self.job.attributes[key]
        msg = 'testing that attribute that previously does not exist is set correctly'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

        key = 'was-already-there'
        value = 'used-to-be-this'
        new_value = 'now-it-is-this'
        self.job.set(key, value)
        self.job.set(key, new_value)
        expected = new_value
        actual = self.job.attributes[key]
        msg = 'testing that attribute that previously existed is re-set correctly'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_delete(self):
        key = 'was-not-there'
        value = 'now-it-is'
        self.job.set(key, value)
        self.job.delete(key)
        member = key
        container = self.job.attributes
        msg = 'testing that attribute is removed when deleted'
        self.assertNotIn(member, container, msg)

    def test_write_job_file(self):
        pass

    def test_list_attributes(self):
        pass

    def test_make_dir(self):
        pass

    def test_make_job_dirs(self):
        pass

    def test_resolve_attribute(self):
        job = Job(self.job_name, Templates.vanilla_base)
        expected = self.job_name
        actual = job._resolve_attribute('initialdir')
        msg = 'checking resolving attribute function'
        self.assertEqual(
            expected, actual,
            '%s\nExpected: %s\nActual:   %s\n' % (msg, expected, actual))

    def test_resolve_attribute_match(self):
        pass
def run_era_interim_rapid_process(rapid_executable_location, rapid_io_files_location, ecmwf_forecast_location,
                                  era_interim_data_location, condor_log_directory, main_log_directory, data_store_url,
                                  data_store_api_key, app_instance_id, sync_rapid_input_with_ckan, download_ecmwf,
                                  download_era_interim, upload_output_to_ckan, generate_return_periods_file):
    """
    This it the main process
    """
    time_begin_all = datetime.datetime.utcnow()
    date_string = time_begin_all.strftime('%Y%m%d')
    #date_string = datetime.datetime(2015,2,3).strftime('%Y%m%d')
    rapid_scripts_location = os.path.dirname(os.path.realpath(__file__))

    if sync_rapid_input_with_ckan and app_instance_id and data_store_url and data_store_api_key:
        #sync with data store
        ri_manager = RAPIDInputDatasetManager(data_store_url,
                                              data_store_api_key,
                                              'ecmwf',
                                              app_instance_id)
        ri_manager.sync_dataset(os.path.join(rapid_io_files_location,'input'))

    #clean up old log files
    clean_logs(condor_log_directory, main_log_directory)

    #initialize HTCondor Directory
    condor_init_dir = os.path.join(condor_log_directory, date_string)
    try:
        os.makedirs(condor_init_dir)
    except OSError:
        pass

    #get list of correclty formatted rapid input directories in rapid directory
    rapid_input_directories = []
    for directory in os.listdir(os.path.join(rapid_io_files_location,'input')):
        if os.path.isdir(os.path.join(rapid_io_files_location,'input', directory)) \
            and len(directory.split("-")) == 2:
            rapid_input_directories.append(directory)
        else:
            print directory, "incorrectly formatted. Skipping ..."

    era_interim_folder = era_interim_data_location
    if download_era_interim:
        #download historical ERA data
        era_interim_folders = download_all_ftp(era_interim_data_location,
           'erai_runoff_1980to20*.tar.gz.tar')
        era_interim_folder = era_interim_folders[0]

    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #init data manager for CKAN
        data_manager = ECMWFRAPIDDatasetManager(data_store_url,
                                                data_store_api_key)

    #run ERA Interim processes
    iteration = 0
    job_list = []
    job_info_list = []
    for rapid_input_directory in rapid_input_directories:
        input_folder_split = rapid_input_directory.split("-")
        watershed = input_folder_split[0]
        subbasin = input_folder_split[1]
        master_watershed_input_directory = os.path.join(rapid_io_files_location, "input", rapid_input_directory)
        master_watershed_outflow_directory = os.path.join(rapid_io_files_location, 'output',
                                                          rapid_input_directory)
        try:
            os.makedirs(master_watershed_outflow_directory)
        except OSError:
            pass
        #get basin names
        interim_folder_basename = os.path.basename(era_interim_folder)
	print era_interim_folder, interim_folder_basename
        outflow_file_name = 'Qout_%s.nc' % interim_folder_basename
        node_rapid_outflow_file = outflow_file_name
        master_rapid_outflow_file = os.path.join(master_watershed_outflow_directory, outflow_file_name)

        #create job to downscale forecasts for watershed
        job = CJob('job_%s_%s_%s' % (interim_folder_basename, watershed, iteration), tmplt.vanilla_transfer_files)
        job.set('executable',os.path.join(rapid_scripts_location,'compute_ecmwf_rapid.py'))
        job.set('transfer_input_files', "%s, %s" % (master_watershed_input_directory, rapid_scripts_location))
        job.set('initialdir', condor_init_dir)
        job.set('arguments', '%s %s %s %s %s' % (watershed.lower(), subbasin.lower(), rapid_executable_location,
                                                    era_interim_folder, ecmwf_forecast_location))
        job.set('transfer_output_remaps', "\"%s = %s\"" % (node_rapid_outflow_file, master_rapid_outflow_file))
        job.submit()
        job_list.append(job)
        job_info_list.append({'watershed' : watershed,
                              'subbasin' : subbasin,
                              'outflow_file_name' : master_rapid_outflow_file,
                              'master_watershed_outflow_directory': master_watershed_outflow_directory,
                              })
        iteration += 1

    #wait for jobs to finish then upload files
    for index, job in enumerate(job_list):
        job.wait()

	#generate return periods
	if generate_return_periods_file:
	    job_info = job_info_list[index]
	    watershed_output_dir = job_info['master_watershed_outflow_directory']
	    erai_output_file = job_info['outflow_file_name']
	    return_periods_file = os.path.join(watershed_output_dir, 'return_periods.nc')
	    generate_return_periods(erai_output_file, return_periods_file)

        """
        #upload file when done
        if upload_output_to_ckan and data_store_url and data_store_api_key:
            job_info = job_info_list[index]
            print "Uploading", job_info['watershed'], job_info['subbasin'], \
                job_info['forecast_date_timestep'], job_info['ensemble_number']
            #Upload to CKAN
            data_manager.initialize_run_ecmwf(job_info['watershed'], job_info['subbasin'], job_info['forecast_date_timestep'])
            data_manager.update_resource_ensemble_number(job_info['ensemble_number'])
            #upload file
            try:
                #tar.gz file
                output_tar_file =  os.path.join(job_info['master_watershed_outflow_directory'], "%s.tar.gz" % data_manager.resource_name)
                if not os.path.exists(output_tar_file):
                    with tarfile.open(output_tar_file, "w:gz") as tar:
                        tar.add(job_info['outflow_file_name'], arcname=os.path.basename(job_info['outflow_file_name']))
                return_data = data_manager.upload_resource(output_tar_file)
                if not return_data['success']:
                    print return_data
                    print "Attempting to upload again"
                    return_data = data_manager.upload_resource(output_tar_file)
                    if not return_data['success']:
                        print return_data
                    else:
                        print "Upload success"
                else:
                    print "Upload success"
            except Exception, e:
                print e
                pass
            #remove tar.gz file
            os.remove(output_tar_file)

    #initialize flows for next run
    if initialize_flows:
        #create new init flow files
        for rapid_input_directory in rapid_input_directories:
            input_directory = os.path.join(rapid_io_files_location, 'input', rapid_input_directory)
            path_to_watershed_files = os.path.join(rapid_io_files_location, 'output', rapid_input_directory)
            forecast_date_timestep = None
            #finds the current output from downscaled ECMWF forecasts
            if os.path.exists(path_to_watershed_files):
                forecast_date_timestep = sorted([d for d in os.listdir(path_to_watershed_files) \
                                        if os.path.isdir(os.path.join(path_to_watershed_files, d))],
                                        reverse=True)[0]

            if forecast_date_timestep:
                #loop through all the rapid_namelist files in directory
                forecast_directory = os.path.join(path_to_watershed_files, forecast_date_timestep)
                input_folder_split = rapid_input_directory.split("-")
                watershed = input_folder_split[0]
                subbasin = input_folder_split[1]
                if initialize_flows:
                    print "Initializing flows for", watershed, subbasin, "from", forecast_date_timestep
                    basin_files = find_current_rapid_output(forecast_directory, watershed, subbasin)
                    try:
                        compute_initial_rapid_flows(basin_files, input_directory, forecast_date_timestep)
                    except Exception, ex:
                        print ex
                        pass
    if upload_output_to_ckan and data_store_url and data_store_api_key:
        #delete local datasets
        for job_info in job_info_list:
            try:
                rmtree(job_info['master_watershed_outflow_directory'])
            except OSError:
                pass
        #delete watershed folder if empty
        for item in os.listdir(os.path.join(rapid_io_files_location, 'output')):
            try:
                os.rmdir(os.path.join(rapid_io_files_location, 'output', item))
            except OSError:
                pass
    """

    #print info to user
    time_end = datetime.datetime.utcnow()
    print "Time Begin All: " + str(time_begin_all)
    print "Time Finish All: " + str(time_end)
    print "TOTAL TIME: "  + str(time_end-time_begin_all)