示例#1
0
def generate_return_periods(qout_file, return_period_file, storm_duration_days=7):
    """
    Generate return period from RAPID Qout file
    """

    #get ERA Interim Data Analyzed
    with RAPIDDataset(qout_file) as qout_nc_file:
        print "Setting up Return Periods File ..."
        return_period_nc = nc.Dataset(return_period_file, 'w')
        
        return_period_nc.createDimension('rivid', qout_nc_file.size_river_id)

        timeSeries_var = return_period_nc.createVariable('rivid', 'i4', ('rivid',))
        timeSeries_var.long_name = (
            'Unique NHDPlus COMID identifier for each river reach feature')

        max_flow_var = return_period_nc.createVariable('max_flow', 'f8', ('rivid',))
        return_period_20_var = return_period_nc.createVariable('return_period_20', 'f8', ('rivid',))
        return_period_10_var = return_period_nc.createVariable('return_period_10', 'f8', ('rivid',))
        return_period_2_var = return_period_nc.createVariable('return_period_2', 'f8', ('rivid',))

        lat_var = return_period_nc.createVariable('lat', 'f8', ('rivid',),
                                                  fill_value=-9999.0)
        lat_var.long_name = 'latitude'
        lat_var.standard_name = 'latitude'
        lat_var.units = 'degrees_north'
        lat_var.axis = 'Y'

        lon_var = return_period_nc.createVariable('lon', 'f8', ('rivid',),
                                                  fill_value=-9999.0)
        lon_var.long_name = 'longitude'
        lon_var.standard_name = 'longitude'
        lon_var.units = 'degrees_east'
        lon_var.axis = 'X'

        return_period_nc.variables['lat'][:] = qout_nc_file.qout_nc.variables['lat'][:]
        return_period_nc.variables['lon'][:] = qout_nc_file.qout_nc.variables['lon'][:]

        river_id_list = qout_nc_file.get_river_id_array()
        return_period_nc.variables['rivid'][:] = river_id_list

        print "Extracting Data and Generating Return Periods ..."
        time_array = qout_nc_file.get_time_array()
        num_years = int((datetime.utcfromtimestamp(time_array[-1])-datetime.utcfromtimestamp(time_array[0])).days/365.2425)
        time_steps_per_day = (24*3600)/float((datetime.utcfromtimestamp(time_array[1])-datetime.utcfromtimestamp(time_array[0])).seconds)
        step = max(1,int(time_steps_per_day * storm_duration_days))

        for comid_index, comid in enumerate(river_id_list):

            filtered_flow_data = qout_nc_file.get_daily_qout_index(comid_index, 
                                                                   steps_per_group=step,
				                                   mode="max")
            sorted_flow_data = np.sort(filtered_flow_data)[:num_years:-1]

            rp_index_20 = int((num_years + 1)/20.0)
            rp_index_10 = int((num_years + 1)/10.0)
            rp_index_2 = int((num_years + 1)/2.0)
            
            max_flow_var[comid_index] = sorted_flow_data[0]
            return_period_20_var[comid_index] = sorted_flow_data[rp_index_20]
            return_period_10_var[comid_index] = sorted_flow_data[rp_index_10]
            return_period_2_var[comid_index] = sorted_flow_data[rp_index_2]

        return_period_nc.close()
    def compute_init_flows_from_past_forecast(self,
                                              forecasted_streamflow_files):
        """
        Compute initial flows from the past ECMWF forecast ensemble
        """
        if forecasted_streamflow_files:
            #get list of COMIDS
            print(
                "Computing initial flows from the past ECMWF forecast ensemble ..."
            )
            with RAPIDDataset(forecasted_streamflow_files[0]) as qout_nc:
                comid_index_list, reordered_comid_list, ignored_comid_list = qout_nc.get_subset_riverid_index_list(
                    self.stream_id_array)
            print("Extracting data ...")
            reach_prediciton_array = np.zeros(
                (len(self.stream_id_array), len(forecasted_streamflow_files),
                 1))
            #get information from datasets
            for file_index, forecasted_streamflow_file in enumerate(
                    forecasted_streamflow_files):
                try:
                    ensemble_index = int(
                        os.path.basename(forecasted_streamflow_file).split(".")
                        [0].split("_")[-1])
                    try:
                        #Get hydrograph data from ECMWF Ensemble
                        with RAPIDDataset(forecasted_streamflow_file
                                          ) as predicted_qout_nc:
                            time_length = predicted_qout_nc.size_time
                            if not predicted_qout_nc.is_time_variable_valid():
                                #data is raw rapid output
                                data_values_2d_array = predicted_qout_nc.get_qout_index(
                                    comid_index_list, time_index=1)
                            else:
                                #the data is CF compliant and has time=0 added to output
                                if ensemble_index == 52:
                                    if time_length == 125:
                                        data_values_2d_array = predicted_qout_nc.get_qout_index(
                                            comid_index_list, time_index=12)
                                    else:
                                        data_values_2d_array = predicted_qout_nc.get_qout_index(
                                            comid_index_list, time_index=2)
                                else:
                                    if time_length == 85:
                                        data_values_2d_array = predicted_qout_nc.get_qout_index(
                                            comid_index_list, time_index=4)
                                    else:
                                        data_values_2d_array = predicted_qout_nc.get_qout_index(
                                            comid_index_list, time_index=2)
                    except Exception:
                        print("Invalid ECMWF forecast file {0}".format(
                            forecasted_streamflow_file))
                        continue
                    #organize the data
                    for comid_index, comid in enumerate(reordered_comid_list):
                        reach_prediciton_array[comid_index][
                            file_index] = data_values_2d_array[comid_index]
                except Exception as e:
                    print(e)
                    #pass

            print("Analyzing data ...")
            for index in range(len(self.stream_segments)):
                try:
                    #get where comids are in netcdf file
                    data_index = np.where(
                        reordered_comid_list ==
                        self.stream_segments[index].stream_id)[0][0]
                    self.stream_segments[index].init_flow = np.mean(
                        reach_prediciton_array[data_index])
                except Exception:
                    #stream id not found in list. Adding zero init flow ...
                    self.stream_segments[index].init_flow = 0
                    pass
                    continue

            print("Initialization Complete!")
def generate_warning_points(ecmwf_prediction_folder, return_period_file,
                            out_directory, threshold):
    """
    Create warning points from return periods and ECMWD prediction data

    """

    #Get list of prediciton files
    prediction_files = sorted([os.path.join(ecmwf_prediction_folder,f) for f in os.listdir(ecmwf_prediction_folder) \
                              if not os.path.isdir(os.path.join(ecmwf_prediction_folder, f)) and f.lower().endswith('.nc')])

    #get the comids in ECMWF files
    with RAPIDDataset(prediction_files[0]) as qout_nc:
        prediction_rivids = qout_nc.get_river_id_array()
        comid_list_length = qout_nc.size_river_id
        size_time = qout_nc.size_time
        first_half_size = 40  #run 6-hr resolution for all
        if qout_nc.is_time_variable_valid():
            if size_time == 41 or size_time == 61:
                #run at full or 6-hr resolution for high res and 6-hr for low res
                first_half_size = 41
            elif size_time == 85 or size_time == 125:
                #run at full resolution for all
                first_half_size = 65
        forecast_date_timestep = os.path.basename(ecmwf_prediction_folder)
        forecast_start_date = datetime.strptime(forecast_date_timestep[:11],
                                                "%Y%m%d.%H")
        time_array = qout_nc.get_time_array(
            datetime_simulation_start=forecast_start_date,
            simulation_time_step_seconds=6 * 3600,
            return_datetime=True)
        current_day = forecast_start_date
        daily_time_index_array = [0]
        for idx, var_time in enumerate(time_array):
            if current_day.day != var_time.day:
                daily_time_index_array.append(idx)
                current_day = var_time

    print("Extracting Forecast Data ...")
    #get information from datasets
    reach_prediciton_array_first_half = np.zeros(
        (comid_list_length, len(prediction_files), first_half_size))
    reach_prediciton_array_second_half = np.zeros(
        (comid_list_length, len(prediction_files), 20))
    for file_index, prediction_file in enumerate(prediction_files):
        data_values_2d_array = []
        try:
            ensemble_index = int(
                os.path.basename(prediction_file)[:-3].split("_")[-1])
            #Get hydrograph data from ECMWF Ensemble
            with RAPIDDataset(prediction_file) as qout_nc:
                data_values_2d_array = qout_nc.get_qout()
        except Exception, e:
            print(e)

        #add data to main arrays and order in order of interim rivids
        if len(data_values_2d_array) > 0:
            for comid_index, comid in enumerate(prediction_rivids):
                if (ensemble_index < 52):
                    reach_prediciton_array_first_half[comid_index][
                        file_index] = data_values_2d_array[
                            comid_index][:first_half_size]
                    reach_prediciton_array_second_half[comid_index][
                        file_index] = data_values_2d_array[comid_index][
                            first_half_size:]
                if (ensemble_index == 52):
                    if first_half_size == 65:
                        #convert to 3hr-6hr
                        streamflow_1hr = data_values_2d_array[
                            comid_index][:90:3]
                        # get the time series of 3 hr/6 hr data
                        streamflow_3hr_6hr = data_values_2d_array[comid_index][
                            90:]
                        # concatenate all time series
                        reach_prediciton_array_first_half[comid_index][
                            file_index] = np.concatenate(
                                [streamflow_1hr, streamflow_3hr_6hr])
                    elif len(data_values_2d_array[comid_index]) == 125:
                        #convert to 6hr
                        streamflow_1hr = data_values_2d_array[
                            comid_index][:90:6]
                        # calculate time series of 6 hr data from 3 hr data
                        streamflow_3hr = data_values_2d_array[comid_index][
                            90:109:2]
                        # get the time series of 6 hr data
                        streamflow_6hr = data_values_2d_array[comid_index][
                            109:]
                        # concatenate all time series
                        reach_prediciton_array_first_half[comid_index][
                            file_index] = np.concatenate([
                                streamflow_1hr, streamflow_3hr, streamflow_6hr
                            ])
                    else:
                        reach_prediciton_array_first_half[comid_index][
                            file_index] = data_values_2d_array[comid_index][:]
示例#4
0
    def append_streamflow_from_rapid_output(self, rapid_output_file,
                                            date_peak_search_start=None,
                                            date_peak_search_end=None):
        """
        Generate StreamFlow raster
        Create AutoRAPID INPUT from single RAPID output
        """
        print "Appending streamflow for:", self.stream_info_file
        #get information from datasets
        #get list of streamids
        stream_info_table = csv_to_list(self.stream_info_file, ", ")[1:]
        #Columns: DEM_1D_Index Row Col StreamID StreamDirection
        streamid_list_full = np.array([row[3] for row in stream_info_table], dtype=np.int32)
        streamid_list_unique = np.unique(streamid_list_full)
        
        temp_stream_info_file = "{0}_temp.txt".format(os.path.splitext(self.stream_info_file)[0])
        print "Analyzing data and appending to list ..."
        with open(temp_stream_info_file, 'wb') as outfile:
            writer = csv.writer(outfile, delimiter=" ")
            writer.writerow(["DEM_1D_Index", "Row", "Col", "StreamID", "StreamDirection", "Slope", "Flow"])
            
            with RAPIDDataset(rapid_output_file) as data_nc:
                
                time_range = data_nc.get_time_index_range(date_search_start=date_peak_search_start,
                                                          date_search_end=date_peak_search_end)
                #perform operation in max chunk size of 4,000
                max_chunk_size = 8*365*5*4000 #5 years of 3hr data (8/day) with 4000 comids at a time
                time_length = 8*365*5 #assume 5 years of 3hr data
                if time_range is not None:
                    time_length = len(time_range)
                else:
                    time_length = data_nc.size_time

                streamid_list_length = len(streamid_list_unique)
                if streamid_list_length <=0:
                    raise IndexError("Invalid stream info file {0}." \
                                     " No stream ID's found ...".format(self.stream_info_file))
                
                step_size = min(max_chunk_size/time_length, streamid_list_length)
                for list_index_start in xrange(0, streamid_list_length, step_size):
                    list_index_end = min(list_index_start+step_size, streamid_list_length)
                    print "River ID subset range {0} to {1} of {2} ...".format(list_index_start,
                                                                               list_index_end,
                                                                               streamid_list_length)
                    print "Extracting data ..."
                    valid_stream_indices, valid_stream_ids, missing_stream_ids = \
                        data_nc.get_subset_riverid_index_list(streamid_list_unique[list_index_start:list_index_end])
                        
                    streamflow_array = data_nc.get_qout_index(valid_stream_indices,
                                                              time_index_array=time_range)
                    
                    print "Calculating peakflow and writing to file ..."
                    for streamid_index, streamid in enumerate(valid_stream_ids):
                        #get where streamids are in the lookup grid id table
                        peak_flow = max(streamflow_array[streamid_index])
                        raster_index_list = np.where(streamid_list_full==streamid)[0]
                        for raster_index in raster_index_list:
                            writer.writerow(stream_info_table[raster_index][:6] + [peak_flow])

                    for missing_streamid in missing_stream_ids:
                        #set flow to zero for missing stream ids
                        raster_index_list = np.where(streamid_list_full==missing_streamid)[0]
                        for raster_index in raster_index_list:
                            writer.writerow(stream_info_table[raster_index][:6] + [0])


         
        os.remove(self.stream_info_file)
        os.rename(temp_stream_info_file, self.stream_info_file)

        print "Appending streamflow complete for:", self.stream_info_file
示例#5
0
    def append_streamflow_from_ecmwf_rapid_output(self, prediction_folder,
                                                  method_x, method_y):
        """
        Generate StreamFlow raster
        Create AutoRAPID INPUT from ECMWF predicitons
     
        method_x = the first axis - it produces the max, min, mean, mean_plus_std, mean_minus_std hydrograph data for the 52 ensembles
        method_y = the second axis - it calculates the max, min, mean, mean_plus_std, mean_minus_std value from method_x
        """
     
        print "Generating Streamflow Raster ..."
        #get list of streamidS
        stream_info_table = csv_to_list(self.stream_info_file, ", ")[1:]

        #Columns: DEM_1D_Index Row Col StreamID StreamDirection
        streamid_list_full = np.array([row[3] for row in stream_info_table], dtype=np.int32)
        streamid_list_unique = np.unique(streamid_list_full)
        if not streamid_list_unique:
            raise Exception("ERROR: No stream id values found in stream info file.")
        
        #Get list of prediciton files
        prediction_files = sorted([os.path.join(prediction_folder,f) for f in os.listdir(prediction_folder) \
                                  if not os.path.isdir(os.path.join(prediction_folder, f)) and f.lower().endswith('.nc')],
                                  reverse=True)
     
     
        print "Finding streamid indices ..."
        with RAPIDDataset(prediction_files[0]) as data_nc:
            reordered_streamid_index_list = data_nc.get_subset_riverid_index_list(streamid_list_unique)[0]

            first_half_size = 40
            if data_nc.size_time == 41 or data_nc.size_time == 61:
                first_half_size = 41
            elif data_nc.size_time == 85 or data_nc.size_time == 125:
                #run at full resolution for all
                first_half_size = 65
        
        print "Extracting Data ..."
        reach_prediciton_array_first_half = np.zeros((len(streamid_list_unique),len(prediction_files),first_half_size))
        reach_prediciton_array_second_half = np.zeros((len(streamid_list_unique),len(prediction_files),20))
        #get information from datasets
        for file_index, prediction_file in enumerate(prediction_files):
            data_values_2d_array = []
            try:
                ensemble_index = int(os.path.basename(prediction_file)[:-3].split("_")[-1])
                #Get hydrograph data from ECMWF Ensemble
                with RAPIDDataset(prediction_file) as data_nc:
                    data_values_2d_array = data_nc.get_qout_index(reordered_streamid_index_list)
    
                    #add data to main arrays and order in order of interim comids
                    if len(data_values_2d_array) > 0:
                        for comid_index in range(len(streamid_list_unique)):
                            if(ensemble_index < 52):
                                reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:first_half_size]
                                reach_prediciton_array_second_half[comid_index][file_index] = data_values_2d_array[comid_index][first_half_size:]
                            if(ensemble_index == 52):
                                if first_half_size == 65:
                                    #convert to 3hr-6hr
                                    streamflow_1hr = data_values_2d_array[comid_index][:90:3]
                                    # get the time series of 3 hr/6 hr data
                                    streamflow_3hr_6hr = data_values_2d_array[comid_index][90:]
                                    # concatenate all time series
                                    reach_prediciton_array_first_half[comid_index][file_index] = np.concatenate([streamflow_1hr, streamflow_3hr_6hr])
                                elif data_nc.size_time == 125:
                                    #convert to 6hr
                                    streamflow_1hr = data_values_2d_array[comid_index][:90:6]
                                    # calculate time series of 6 hr data from 3 hr data
                                    streamflow_3hr = data_values_2d_array[comid_index][90:109:2]
                                    # get the time series of 6 hr data
                                    streamflow_6hr = data_values_2d_array[comid_index][109:]
                                    # concatenate all time series
                                    reach_prediciton_array_first_half[comid_index][file_index] = np.concatenate([streamflow_1hr, streamflow_3hr, streamflow_6hr])
                                else:
                                    reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:]
                                
            except Exception as e:
                print e
                #pass
     
        print "Analyzing data and writing output ..."
        temp_stream_info_file = "{0}_temp.txt".format(os.path.splitext(self.stream_info_file)[0])
        with open(temp_stream_info_file, 'wb') as outfile:
            writer = csv.writer(outfile, delimiter=" ")
            writer.writerow(["DEM_1D_Index", "Row", "Col", "StreamID", "StreamDirection", "Slope", "Flow"])

            for streamid_index, streamid in enumerate(streamid_list_unique):
                #perform analysis on datasets
                all_data_first = reach_prediciton_array_first_half[streamid_index]
                all_data_second = reach_prediciton_array_second_half[streamid_index]
         
                series = []
         
                if "mean" in method_x:
                    #get mean
                    mean_data_first = np.mean(all_data_first, axis=0)
                    mean_data_second = np.mean(all_data_second, axis=0)
                    series = np.concatenate([mean_data_first,mean_data_second])
                    if "std" in method_x:
                        #get std dev
                        std_dev_first = np.std(all_data_first, axis=0)
                        std_dev_second = np.std(all_data_second, axis=0)
                        std_dev = np.concatenate([std_dev_first,std_dev_second])
                        if method_x == "mean_plus_std":
                            #mean plus std
                            series += std_dev
                        elif method_x == "mean_minus_std":
                            #mean minus std
                            series -= std_dev
         
                elif method_x == "max":
                    #get max
                    max_data_first = np.amax(all_data_first, axis=0)
                    max_data_second = np.amax(all_data_second, axis=0)
                    series = np.concatenate([max_data_first,max_data_second])
                elif method_x == "min":
                    #get min
                    min_data_first = np.amin(all_data_first, axis=0)
                    min_data_second = np.amin(all_data_second, axis=0)
                    series = np.concatenate([min_data_first,min_data_second])
         
                data_val = 0
                if "mean" in method_y:
                    #get mean
                    data_val = np.mean(series)
                    if "std" in method_y:
                        #get std dev
                        std_dev = np.std(series)
                        if method_y == "mean_plus_std":
                            #mean plus std
                            data_val += std_dev
                        elif method_y == "mean_minus_std":
                            #mean minus std
                            data_val -= std_dev
         
                elif method_y == "max":
                    #get max
                    data_val = np.amax(series)
                elif method_y == "min":
                    #get min
                    data_val = np.amin(series)
         
                #get where streamids are in the lookup grid id table
                raster_index_list = np.where(streamid_list_full==streamid)[0]
                for raster_index in raster_index_list:
                    writer.writerow(stream_info_table[raster_index][:6] + [data_val])

        os.remove(self.stream_info_file)
        os.rename(temp_stream_info_file, self.stream_info_file)
 def compute_init_flows_from_past_forecast(self, forecasted_streamflow_files):
     """
     Compute initial flows from the past ECMWF forecast ensemble
     """
     if forecasted_streamflow_files:
         #get list of COMIDS
         print("Computing initial flows from the past ECMWF forecast ensemble ...")
         with RAPIDDataset(forecasted_streamflow_files[0]) as qout_nc:
             comid_index_list, reordered_comid_list, ignored_comid_list = qout_nc.get_subset_riverid_index_list(self.stream_id_array)
         print("Extracting data ...")
         reach_prediciton_array = np.zeros((len(self.stream_id_array),len(forecasted_streamflow_files),1))
         #get information from datasets
         for file_index, forecasted_streamflow_file in enumerate(forecasted_streamflow_files):
             try:
                 ensemble_index = int(os.path.basename(forecasted_streamflow_file).split(".")[0].split("_")[-1])
                 try:
                     #Get hydrograph data from ECMWF Ensemble
                     with RAPIDDataset(forecasted_streamflow_file) as predicted_qout_nc:
                         time_length = predicted_qout_nc.size_time
                         if not predicted_qout_nc.is_time_variable_valid():
                             #data is raw rapid output
                             data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, 
                                                                                     time_index=1)
                         else:
                             #Added following comment lines 20190115 CJB
                             #the data is CF compliant and has time=0 added to output
                             #time_index seems to be the multiplier for a 12-hour increment in the data:
                             #HRES (#52) is 1-hourly to start with so 12 x 1 = 12. If HRES (#52) doesn't
                             #have 1-hourly then it is 6-hourly so 2 x 6 = 12.
                             #ENS is 3-hourly so 4 x 3 = 12 otherwise it may be 6-hourly: 2 x 6 = 12 again.
                             #It may be better to use:
                             # time_interval = CreateInflowFileFromECMWFRunoff.dataIdentify(forecasted_streamflow_file)  #Added following lines 20190115 CJB
                             #
                             RAPIDinflowECMWF_tool = CreateInflowFileFromECMWFRunoff()
                             time_interval = RAPIDinflowECMWF_tool.dataIdentify(forecasted_streamflow_file)
                             print(time_interval)
                             if ensemble_index == 52:
                                 if time_interval == 'HRES1' or time_interval == 'HRES13' or time_interval == 'HRES136':  #if time_length == 125: # Added following lines 20190115 CJB
                                     data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, 
                                                                                             time_index=12)
                                 else: #otherwise it is ENS6
                                     data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, 
                                                                                             time_index=2)
                             else:
                                 if time_interval == 'ENS3' or time_interval == 'ENS36':  #if time_length == 85: # Added following lines 20190115 CJB
                                     data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, 
                                                                                             time_index=4)
                                 else: # otherwise it is ENS6
                                     data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, 
                                                                                             time_index=2)
                 except Exception:
                     print("Invalid ECMWF forecast file {0}".format(forecasted_streamflow_file))
                     continue
                 #organize the data
                 for comid_index, comid in enumerate(reordered_comid_list):
                     reach_prediciton_array[comid_index][file_index] = data_values_2d_array[comid_index]
             except Exception as e:
                 print(e)
                 #pass
 
         print("Analyzing data ...")
         for index in range(len(self.stream_segments)):
             try:
                 #get where comids are in netcdf file
                 data_index = np.where(reordered_comid_list==self.stream_segments[index].stream_id)[0][0]
                 self.stream_segments[index].init_flow = np.mean(reach_prediciton_array[data_index])
             except Exception:
                 #stream id not found in list. Adding zero init flow ...
                 self.stream_segments[index].init_flow = 0
                 pass
                 continue
         
         print("Initialization Complete!")