Пример #1
0
def plot_diff_between_files(file1, file2, i_array, j_array):
    data1 = data_select.get_data_from_file(file1)
    data2 = data_select.get_data_from_file(file2)
    the_diff = np.mean(data2 - data1, axis = 0)

    plot_data(the_diff, i_array, j_array, name = 'the_diff',
              title='AEX, difference between \n %s \n and \n %s' % (file2, file1))

    pass
def get_significance_and_changes_for_months(months = range(1, 13),
                                            folder_path = "data/streamflows/hydrosheds_euler9"
                                            ):
    """
    returns boolean vector of the size n_grid_cells where True means
    significant change and False not significant
    and
    the percentage of changes with respect to the current mean
    """

    current_means = []
    future_means = []


    id_to_file_path = {}
    for the_id in members.all_members:
        for file_name in os.listdir(folder_path):
            if file_name.startswith( the_id ):
                id_to_file_path[the_id] = os.path.join(folder_path, file_name)




    for the_id in members.all_current:
        current_file = id_to_file_path[the_id]
        streamflows, times, i_indices, j_indices = data_select.get_data_from_file(current_file)
        current_mean_dict = data_select.get_means_over_months_for_each_year(times, streamflows, months=months)
        current_means.extend(current_mean_dict.values())

        future_file = id_to_file_path[members.current2future[the_id]]
        streamflows, times, i_indices, j_indices = data_select.get_data_from_file(future_file)
        future_mean_dict = data_select.get_means_over_months_for_each_year(times, streamflows, months=months)
        future_means.extend(future_mean_dict.values())




    current_means = np.array( current_means )
    future_means = np.array( future_means )



    print future_means.shape

    t, p = stats.ttest_ind(current_means, future_means, axis = 0)

    is_sign = p < 0.05 #significance to the 5% confidence level

    current_means = np.mean(current_means, axis=0)
    future_means = np.mean(future_means, axis=0)

    print future_means.shape
    print "number of significant points = ", sum( map(int, is_sign) )
    return is_sign, (future_means - current_means) / current_means * 100.0

    pass
Пример #3
0
def compare_simulations(path1, path2, label1 = '1', label2 = '2', field_name = 'water_discharge'):
    data1, time1, i_indices, j_indices =  data_select.get_data_from_file(path1, field_name)
    data2, time2, i_indices, j_indices =  data_select.get_data_from_file(path2, field_name)

    the_mins1 = np.min(data1, axis = 0)
    the_mins2 = np.min(data2, axis = 0)


    the_maxs1 = np.max(data1, axis = 0)
    the_maxs2 = np.max(data2, axis = 0)

    the_means1 = np.mean(data1, axis = 0)
    the_means2 = np.mean(data2, axis = 0)

    #scatter plot for means
    plt.subplots_adjust(hspace = 0.5)

    plt.subplot(2,2,1)
    plt.title('means', override)
    plt.scatter( the_means1 , the_means2, linewidth = 0)
    plt.xlabel(label1)
    plt.ylabel(label2)

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)

    #scatter plot for minimums
    plt.subplot(2,2,2)
    plt.title('minimums', override)

    plt.scatter( the_mins1 , the_mins2, linewidth = 0)
    plt.xlabel(label1)
    plt.ylabel(label2)

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)

    #scatter plot for minimums
    plt.subplot(2,2,3)
    plt.title('maximums', override)
    plt.scatter( the_maxs1 , the_maxs2, linewidth = 0)
    plt.xlabel(label1)
    plt.ylabel(label2)

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)
    plt.savefig('{0}_{1}_scatter.png'.format(label1, label2), bbox_inches = 'tight')



    pass
def compare_and_plot(path_to_folder = 'data/streamflows/hydrosheds_euler9'):
    """
    Calculates interannual variability (standard ceviations) for each pair of members
    and plots their ratios

    create annual mean matrices -> calculate standard deviations for future
    and current climate, plot ratios of variations for memebers and
    the std for the control run,
    """

    member_to_path = get_member_to_path_mapping(path_to_folder)
    plot_utils.apply_plot_params(aspect_ratio = 1.5)
    plt.figure()

    plot_marks = ['a', 'b', 'c', 'd', 'e']
    subplot_count = 1
    for current_id, plot_mark in zip(members.current_ids, plot_marks):
        future_id = members.current2future[current_id]

        path_c = member_to_path[current_id]
        path_f = member_to_path[future_id]

        stfl_c, times_c, i_indices, j_indices = data_select.get_data_from_file(path_c)
        stfl_f, times_f, i_indices, j_indices = data_select.get_data_from_file(path_f)

        means_c = calculate_annual_means(times_c, stfl_c)
        means_f = calculate_annual_means(times_f, stfl_f)

        std_c = np.std(means_c, axis = 0)
        std_f = np.std(means_f, axis = 0)

        f_values = std_f / std_c
        plt.subplot(3, 2, subplot_count)
        plot_subplot(i_indices, j_indices, f_values, mark = plot_mark)

        subplot_count += 1

    #plot variance for the control simulation
    plt.subplot(3,2, subplot_count)
    stfl_c, times_c, i_indices, j_indices = data_select.get_data_from_file(path_c)
    means_c = calculate_annual_means(times_c, stfl_c)
    std_c = np.std(means_c, axis = 0)
    plot_subplot(i_indices, j_indices, std_c, mark = 'f')

    super_title = 'a-e: Changes in interannual variability ($\\sigma_{\\rm future}/ \\sigma_{\\rm current}$). \n'
    super_title += 'f: Interannual variability of the control simulation'
    plt.suptitle(super_title)



    plt.show()
Пример #5
0
def plot_cv_for_seasonal_mean(folder_path = "data/streamflows/hydrosheds_euler9",
                            member_ids = None,
                            file_name_pattern = "%s_discharge_2041_01_01_00_00.nc",
                            months = range(1,13),
                            out_file_name = "cv_for_annual_mean.png",
                            max_value = None
                            ):
    """
    calculate and plot cv for annual mean values
    """
    plt.figure()
    times = None
    i_indices = None
    j_indices = None
    x_min, x_max = None, None
    y_min, y_max = None, None
    seasonal_means = []
    for i, the_id in enumerate( member_ids ):
        fName = file_name_pattern % the_id
        fPath = os.path.join(folder_path, fName)
        if not i:
            data, times, i_indices, j_indices = data_select.get_data_from_file(fPath)
            interest_x = x[i_indices, j_indices]
            interest_y = y[i_indices, j_indices]
            x_min, x_max, y_min, y_max = _get_limits(interest_x = interest_x, interest_y = interest_y)
        else:
            data = data_select.get_field_from_file(path = fPath)
        assert data is not None, "i = %d " % i

        if len(months) == 12:
            the_seasonal_mean = np.mean(data, axis = 0)
        else:
            bool_vector = map(lambda t: t.month in months, times)
            indices = np.where(bool_vector)
            the_seasonal_mean = np.mean(data[indices[0],:], axis = 0)
        seasonal_means.append(the_seasonal_mean)


    seasonal_means = np.array( seasonal_means )
    mu = np.mean(seasonal_means, axis=0)
    sigma = np.std(seasonal_means,axis=0)
    cv = sigma / mu

    cMap = mpl.cm.get_cmap(name = "jet_r", lut = 30)
    cMap.set_over(color = "0.5")


    to_plot = np.ma.masked_all(x.shape)
    for the_index, i, j in zip( xrange(len(i_indices)), i_indices, j_indices):
        to_plot[i, j] = cv[the_index]


    basemap.pcolormesh(x, y, to_plot.copy(), cmap = cMap, vmin = 0, vmax = max_value)
    basemap.drawcoastlines(linewidth = 0.5)
    plt.colorbar(ticks = LinearLocator(numticks = 11), format = "%.1e")
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)

    plt.savefig(out_file_name, bbox_inches = "tight")
Пример #6
0
 def __init__(self, path = "", spinup_years = None):
     self.spinup_years = spinup_years
     dataCollection = data_select.get_data_from_file(path = path)
     self.data = dataCollection[0]
     self.times = dataCollection[1]
     self.x_indices = dataCollection[2]
     self.y_indices = dataCollection[3]
     self.label = "%d years" % spinup_years
     self._select_first_year_data()
Пример #7
0
def get_dispersion_between_members(files):
    datas = []
    for path in files:
        data = data_select.get_data_from_file(path)

        datas.append(data)

    nt, ncell = datas[0].shape
    nmembers = len(datas)
    all_data = np.zeros((nmembers, nt, ncell))
    for i, the_data in enumerate(datas):
        all_data[i, :, :] = the_data[:,:]

    return np.mean(np.std(all_data, axis = 0), axis = 0)
def plot_mean_hydrograph_with_gw_ouflow(data_path = 'data/streamflows/hydrosheds_euler10_spinup100yrs'):
#TODO: Implement
    basins = infocell.get_basins_with_cells_connected_using_hydrosheds_data()

    basinName = 'RDO'
    theBasin = None
    for basin in basins:
        # @type basin Basin
        if basin.name == basinName:
            theBasin = basin
            break

    data = data_select.get_data_from_file(path = data_path, field_name = 'gw_outflow')
    gw_outflow = data[0]
    times = data[1]
    x_index = data[2]
    y_index = data[3]

    data = data_select.get_data_from_file(path = data_path, field_name = 'surface_runoff')
    surface_runoff = data[0]

    for t in times:
        pass
Пример #9
0
def do_bootstrap_for_simulation_mean(sim_id = "aet", folder_path = "data/streamflows/hydrosheds_euler9",
                                     months = range(1, 13), n_samples = 1000):

    """
    returns the object containing means for the domain and standard deviations from bootstrap
    """
    cache_file = _get_cache_file_path(sim_id=sim_id, months=months)
    if os.path.isfile(cache_file):
       return pickle.load(open(cache_file))


    #determine path to the file with data
    filePath = None
    for f in os.listdir(folder_path):
        if f.startswith(sim_id):
            filePath = os.path.join(folder_path, f)
            break

    streamflow, times, i_indices, j_indices = data_select.get_data_from_file(filePath)

    #for each year and for each gridcell get mean value for the period
    means_dict = data_select.get_means_over_months_for_each_year(times, streamflow, months = months)

    means_sorted_in_time = map( lambda x : x[1], sorted(means_dict.items(), key=lambda x: x[0]) )
    data_matrix = np.array(means_sorted_in_time)
    print "data_matrix.shape = ", data_matrix.shape

    #generate indices
    index_matrix = np.random.rand(n_samples, data_matrix.shape[0])
    index_matrix *= (data_matrix.shape[0] - 1)
    index_matrix =  index_matrix.round().astype(int)

    means_matrix = np.zeros((n_samples, streamflow.shape[1])) #n_samples x n_points
    for i in xrange(n_samples):
        means_matrix[i,:] = np.mean(data_matrix[index_matrix[i,:],:], axis = 0)


    m_holder = MeansAndDeviation(sim_id=sim_id,
                                 means_for_domain=np.mean(data_matrix, axis = 0),
                                 standard_devs_for_domain=np.std(means_matrix, axis = 0))

    pickle.dump(m_holder, open(cache_file, mode="w"))
    return m_holder
Пример #10
0
def plot_annual_extremes(data_path = 'data/streamflows/VplusFmask_newton/aex_discharge_1970_01_01_00_00.nc',
                         start_date = datetime(1970, 1,1, 0,0,0),
                         end_date = datetime(2000, 1,1, 0,0,0),
                         ):

    streamflows, times, i_array, j_array = data_select.get_data_from_file(data_path)

    period_start_month = 1
    period_end_month = 12
    the_minima = data_select.get_minimums_for_domain(streamflows, times,
                                             start_date = start_date, end_date = end_date,
                                             start_month = period_start_month,
                                             end_month = period_end_month,
                                             duration_days = 1)

    plot_data_2d.plot_data(the_minima, i_array, j_array, name = "minima", title = "min",
                            digits = 1,
                            color_map = mpl.cm.get_cmap("OrRd", 10),
                            minmax = (None, None),
                            units = "m**3/s")



    period_start_month = 4
    period_end_month = 6
    the_maximums = data_select.get_maximums_for_domain(streamflows, times,
                                             start_date = start_date, end_date = end_date,
                                             start_month = period_start_month,
                                             end_month = period_end_month,
                                             duration_days = 7)

    plot_data_2d.plot_data(the_maximums, i_array, j_array, name = "maxima", title = "max",
                            digits = 1,
                            color_map = mpl.cm.get_cmap("OrRd", 10),
                            minmax = (None, None),
                            units = "m**3/s")



    pass
Пример #11
0
def get_meanof_means_and_stds_from_files(files):
    mean = None
    stdevs = None

    if not len(files): return

    for path in files:
        data = data_select.get_data_from_file(path)
        if mean is None:
            mean = np.zeros(data.shape[1])
            stdevs = np.zeros(data.shape[1])
            
        mean += np.mean(data, axis = 0)
        stdevs += np.std(data, axis = 0)


    mean /= float(len(files))
    stdevs /= float(len(files))

    print 'max deviation: ', np.max(stdevs)
    assert mean.shape[0] == data.shape[1]
    return mean, stdevs
def plot_ratio(path = 'data/streamflows/hydrosheds_euler10_spinup100yrs/aex_discharge_1970_01_01_00_00.nc',
                min_lon = None, max_lon = None, min_lat = None, max_lat = None):

    res = data_select.get_data_from_file(path = path, field_name = 'surface_runoff')
    surf_runoff, times, x_indices, y_indices = res
    total_runoff = data_select.get_field_from_file(path, field_name = 'subsurface_runoff')
    total_runoff = surf_runoff + total_runoff

    if min_lon != None:
        lons = data_select.get_field_from_file(path, field_name = 'longitude')
        lats = data_select.get_field_from_file(path, field_name = 'latitude')
        
        lons_1d = lons[x_indices, y_indices]
        lats_1d = lats[x_indices, y_indices]
        
        condition = (lons_1d >= min_lon) & (lons_1d <= max_lon)
        condition = (lats_1d >= min_lat) & (lats_1d <= max_lat)
        surf_runoff = surf_runoff[:,condition]
        total_runoff = total_runoff[:,condition]



    mean_surf_runoff = np.mean(surf_runoff, axis = 1) #mean in space
    mean_total_runoff = np.mean(total_runoff, axis = 1)


    stamp_dates = map(lambda d: swe.toStampYear(d, stamp_year = 2000), times)

    t1, v1 = get_mean_for_day_of_year(stamp_dates, mean_surf_runoff)
    plt.plot(t1, v1, label = 'surface runoff', linewidth = 3)

    t2, v2 = get_mean_for_day_of_year(stamp_dates, mean_total_runoff)
    plt.plot(t2, v2, label = 'total runoff', linewidth = 3)
    plt.legend()
    plt.gca().xaxis.set_major_formatter(mpl.dates.DateFormatter('%b'))
    plt.show()
def compare_means(member_id = 'aex' ,data_folder1 = '', label1 = '', data_folder2 = '', label2 = ''):
    basin_path = 'data/infocell/amno180x172_basins.nc'
    basin_indices = lowflow.read_basin_indices(basin_path)


    for f in os.listdir(data_folder1):
        if f.lower().startswith(member_id):
            path1 = os.path.join(data_folder1, f)

    for f in os.listdir(data_folder2):
        if f.lower().startswith(member_id):
            path2 = os.path.join(data_folder2, f)


    discharge_1, times1, i_list, j_list = data_select.get_data_from_file(path1, 'water_discharge')
    discharge_2, times2, i_list, j_list = data_select.get_data_from_file(path2, 'water_discharge')



    discharge_values_1 = []
    discharge_values_2 = []
   

    for pos in range(discharge_1.shape[1]):
        dates, discharge_tmp = pe.average_for_each_day_of_year(times1, discharge_1[:, pos], year = 2000)
        discharge_values_1.append(np.array(discharge_tmp))

        dates, discharge_tmp = pe.average_for_each_day_of_year(times2, discharge_2[:, pos], year = 2000)
        discharge_values_2.append(np.array(discharge_tmp))


    basin_to_discharge_1 = {}
    basin_to_discharge_2 = {}

    the_zip = zip(i_list, j_list, discharge_values_1, discharge_values_2)

    for basin in basin_indices:
        for i, j, d_1, d_2 in the_zip:
            if basin.mask[i, j] == 1:
                if basin_to_discharge_1.has_key(basin):
                    basin_to_discharge_1[basin] += d_1
                    basin_to_discharge_2[basin] += d_2
                else:
                    basin_to_discharge_1[basin] = d_1
                    basin_to_discharge_2[basin] = d_2

    for basin in basin_to_discharge_1.keys():
        n = float(basin.get_number_of_cells())
        basin_to_discharge_1[basin] /= n
        basin_to_discharge_2[basin] /= n


    plt.figure()
    n = 1
    for basin, d in basin_to_discharge_1.iteritems():
        plt.subplot(7, 3, n)
        plt.title(basin.name)
        dicharge_line_1 = plt.plot(dates, d, linewidth = 2, color = 'b')
        discharge_line_2 = plt.plot(dates, basin_to_discharge_2[basin],
                                    linewidth = 2, color = 'r')

        #runoff_line = plt.plot(dates, basin_to_runoff[basin])

        ax = plt.gca()
        ax.xaxis.set_major_locator(
            mpl.dates.MonthLocator(bymonth = range(2,13,2))
        )


        ax.xaxis.set_major_formatter(
            mpl.dates.DateFormatter('%b')
        )
        n += 1

    plt.figlegend([dicharge_line_1, discharge_line_2], [label1, label2], 'upper right')
    plt.savefig('{0}_hydrographs.png'.format(member_id), bbox_inches = 'tight')

    pass
def plot_basin_mean_hydrograph(current_id = 'aex', future_id = None,
                                data_folder = 'data/streamflows/hydrosheds_euler7',
                                current_start_date = None, current_end_date = None,
                                future_start_date = None, future_end_date = None):



    basin_path = 'data/infocell/amno180x172_basins.nc'
    basin_indices = lowflow.read_basin_indices(basin_path)


    for f in os.listdir(data_folder):
        if f.lower().startswith(current_id):
            path_current = os.path.join(data_folder, f)
        if f.lower().startswith(future_id):
            path_future = os.path.join(data_folder, f)


    discharge_current, times_current, i_list, j_list = data_select.get_data_from_file(path_current, 'water_discharge')
    discharge_future, times_future, i_list, j_list = data_select.get_data_from_file(path_future, 'water_discharge')



    discharge_values_current = []
    discharge_values_future = []

    for pos in range(discharge_current.shape[1]):
        dates, discharge1 = pe.average_for_each_day_of_year(times_current, discharge_current[:, pos],
                                   start_date = current_start_date,
                                   end_date = current_end_date, year = 2000)
        discharge_values_current.append(np.array(discharge1))


        dates, discharge1 = pe.average_for_each_day_of_year(times_future, discharge_future[:, pos],
                                   start_date = future_start_date,
                                   end_date = future_end_date, year = 2000)
        discharge_values_future.append(np.array(discharge1))

    
    basin_to_discharge_current = {}
    basin_to_discharge_future = {}
    
    the_zip = zip(i_list, j_list, discharge_values_current, discharge_values_future)

    for basin in basin_indices:
        for i, j, d_current, d_future in the_zip:
            if basin.mask[i, j] == 1:
                if basin_to_discharge_current.has_key(basin):
                    basin_to_discharge_current[basin] += d_current
                    basin_to_discharge_future[basin] += d_future
                else:
                    basin_to_discharge_current[basin] = d_current
                    basin_to_discharge_future[basin] = d_future

    for basin in basin_to_discharge_current.keys():
        n = float(basin.get_number_of_cells())
        basin_to_discharge_current[basin] /= n
        basin_to_discharge_future[basin] /= n


    plt.figure()
    n = 1
    plt.subplots_adjust(hspace = 0.5)
    for basin, d in basin_to_discharge_current.iteritems():
        plt.subplot(7, 3, n)
        plt.title(basin.name)

        dicharge_line_current = plt.plot(dates, d, linewidth = 2, color = 'b')
        discharge_line_future = plt.plot(dates, basin_to_discharge_future[basin], linewidth = 2,
                                            color = 'r')

        plt.ylabel('${\\rm m^3/s}$')
        #runoff_line = plt.plot(dates, basin_to_runoff[basin])

        ax = plt.gca()
        ax.xaxis.set_major_locator(
            mpl.dates.MonthLocator(bymonth = range(2,13,2))
        )


        ax.xaxis.set_major_formatter(
            mpl.dates.DateFormatter('%b')
        )
        n += 1

    plt.figlegend([dicharge_line_current, discharge_line_future], ['current', 'future'], 'upper right')
    plt.savefig('{0}_{1}_hydrographs.png'.format(current_id, future_id), bbox_inches = 'tight')

    pass
Пример #15
0
def plot_seasonal_mean_streamflows(folder_path = "data/streamflows/hydrosheds_euler9",
                                 member_ids = None,
                                 file_name_pattern = "%s_discharge_1970_01_01_00_00.nc",
                                 months = range(1,13),
                                 out_file_name = "annual_means.png"
                                 ):
    print months

    if member_ids is None:
        return

    i_indices = None
    j_indices = None
    times = None
    x_min, x_max = None, None
    y_min, y_max = None, None
    the_seasonal_mean_list = []
    for i, the_id in enumerate( member_ids ):
        fName = file_name_pattern % the_id
        fPath = os.path.join(folder_path, fName)
        print fPath
        data, times, i_indices, j_indices = data_select.get_data_from_file(fPath)
        if not i:
            interest_x = x[i_indices, j_indices]
            interest_y = y[i_indices, j_indices]
            x_min, x_max, y_min, y_max = _get_limits(interest_x = interest_x, interest_y = interest_y)

        assert data is not None, "i = %d " % i
        if len(months) == 12:
            the_seasonal_mean = np.mean(data, axis = 0)
        else:
            bool_vector = map(lambda t: t.month in months, times) # take only month of interest
            indices = np.where(bool_vector)
            print indices[0].shape
            print len(indices)

            the_seasonal_mean = np.mean(data[indices[0],:], axis = 0)
            print data.shape
            print "data = ", data[indices[0],:].shape
            print "mean = ", the_seasonal_mean.shape
            print sum(map(int, bool_vector))
        the_seasonal_mean_list.append(the_seasonal_mean)

    print np.array(the_seasonal_mean_list).shape

    plot_utils.apply_plot_params(aspect_ratio=0.8)
    plt.figure()
    plt.subplots_adjust(hspace = 0.1, wspace = 0.3)
    max_value = np.array(the_seasonal_mean_list).max()


    cMap = mpl.cm.get_cmap(name = "jet_r", lut = 18)

    for k, a_seasonal_mean in enumerate(the_seasonal_mean_list):
        plt.subplot( 2, len(member_ids) // 2 + 1 , k + 1)
        to_plot = np.ma.masked_all(x.shape)
        for the_index, i, j in zip( xrange(len(i_indices)), i_indices, j_indices):
            to_plot[i, j] = a_seasonal_mean[the_index]

        basemap.pcolormesh(x, y, to_plot.copy(), cmap = cMap,
                           vmin = 0, vmax = max_value)
        basemap.drawcoastlines(linewidth = 0.5)
        plt.colorbar(ticks = LinearLocator(numticks = 7), format = "%d")
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)
        #plt.savefig(str(k+1)+"_"+out_file_name)

    plt.savefig(out_file_name)

    pass
Пример #16
0
def plot_mean_extreme_flow(folder_path = "data/streamflows/hydrosheds_euler9",
                                 member_ids = None,
                                 file_name_pattern = "%s_discharge_1970_01_01_00_00.nc",
                                 out_file_name = "annual_means.png",
                                 high = True,
                                 start_month = 1, end_month = 12
                                 ):
    """
    Plot mean extreme (1-day high or 15-day low) flow over time
    """
    if member_ids is None:
        return


    i_indices = None
    j_indices = None
    times = None
    x_min, x_max = None, None
    y_min, y_max = None, None
    the_extreme_list = []
    for i, the_id in enumerate( member_ids ):
        fName = file_name_pattern % the_id
        fPath = os.path.join(folder_path, fName)
        if not i:
            data, times, i_indices, j_indices = data_select.get_data_from_file(fPath)
            interest_x = x[i_indices, j_indices]
            interest_y = y[i_indices, j_indices]
            x_min, x_max, y_min, y_max = _get_limits(interest_x = interest_x, interest_y = interest_y)
        else:
            data = data_select.get_field_from_file(path = fPath)


        assert data is not None, "i = %d " % i


        if high:
            extremes = data_select.get_list_of_annual_maximums_for_domain(data, times,
                                                                          start_month = start_month,
                                                                          end_month = end_month)
        else:
            extremes = data_select.get_list_of_annual_minimums_for_domain(data, times,
                                                                          event_duration = timedelta(days = 15),
                                                                          start_month = start_month,
                                                                          end_month = end_month
                                                                          )

        the_extreme_list.append(np.mean(extremes, axis = 0))


    print "shape of extremes list ", np.array(the_extreme_list).shape

    plot_utils.apply_plot_params(aspect_ratio=0.8)
    plt.figure()
    plt.subplots_adjust(hspace = 0.1, wspace = 0.3)

    for k, the_extreme_mean in enumerate(the_extreme_list):
        plt.subplot( 2, len(member_ids) // 2 + 1 , k + 1)
        to_plot = np.ma.masked_all(x.shape)
        for the_index, i, j in zip( xrange(len(i_indices)), i_indices, j_indices):
            to_plot[i, j] = the_extreme_mean[the_index]

        basemap.pcolormesh(x, y, to_plot.copy(), cmap = mpl.cm.get_cmap(name = "jet_r", lut = 18), vmin = 0,
                           vmax = 1.5)
        basemap.drawcoastlines(linewidth = 0.5)
        plt.colorbar(ticks = LinearLocator(numticks = 7), format = "%.2e")
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)

    plt.savefig(out_file_name)

    #plot cv for the extremes     (here for performance, no need to again fetch the extremes)
    max_value = 0.1
    plot_utils.apply_plot_params(width_pt=600)
    plt.figure()
    extreme_means = np.array( the_extreme_list )
    mu = np.mean(extreme_means, axis=0)
    sigma = np.std(extreme_means,axis=0)
    cv = sigma / mu

    to_plot = np.ma.masked_all(x.shape)
    for the_index, i, j in zip( xrange(len(i_indices)), i_indices, j_indices):
        to_plot[i, j] = cv[the_index]

    basemap.pcolormesh(x, y, to_plot.copy(), cmap = mpl.cm.get_cmap(name = "jet_r", lut = 30), vmin = 0,
                       vmax = max_value)
    basemap.drawcoastlines(linewidth = 0.5)
    plt.colorbar(ticks = LinearLocator(numticks = 11), format = "%.1e")
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.savefig("cv_" + out_file_name)
    pass
def main():
    """

    """
    skip_ids = ['081007', '081002', "042607", "090605"]

    #comment to plot for all ensemble members
    members.current_ids = []


    #pylab.rcParams.update(params)
    path_format = 'data/streamflows/hydrosheds_euler9/%s_discharge_1970_01_01_00_00.nc'
    #path_format = "data/streamflows/hydrosheds_rk4_changed_partiotioning/%s_discharge_1970_01_01_00_00.nc"
    #path_format = "data/streamflows/piloted_by_ecmwf/ecmwf_nearest_neighbor_discharge_1970_01_01_00_00.nc"
    path_to_analysis_driven = path_format % members.control_id

    simIdToData = {}
    simIdToTimes = {}
    for the_id in members.current_ids:
        thePath = path_format % the_id
        [simIdToData[the_id], simIdToTimes[the_id], i_list, j_list] = data_select.get_data_from_file(thePath)


    old = True #in the old version drainage and lon,lats in the file are 1D


    [ data, times, i_list, j_list ] = data_select.get_data_from_file(path_to_analysis_driven)

    cell_list = []
    ij_to_cell = {}
    prev_cell_indices = []
    tot_rof = None
    if old:
        #surf_rof = data_select.get_data_from_file(path_format % ("aex",), field_name="")
        the_path = path_format % ("aex")
        static_data_path = "data/streamflows/hydrosheds_euler9/infocell9.nc"
        #ntimes x ncells
        tot_rof = data_select.get_field_from_file(the_path, field_name="total_runoff")
        cell_areas = data_select.get_field_from_file(static_data_path, field_name="AREA")

        #convert the runoff to m^3/s
        tot_rof *= 1.0e6 * cell_areas[i_list, j_list] / 1.0e3


        flow_dir_values = data_select.get_field_from_file(static_data_path,
            field_name="flow_direction_value")[i_list, j_list]

        cell_list = map(lambda i, j, the_id: Cell(id = the_id, ix = i, jy = j),
                                i_list, j_list, xrange(len(i_list)))


        ij_to_cell = dict( zip( zip(i_list, j_list), cell_list ))


        for ix, jy, aCell, dir_val in zip( i_list, j_list, cell_list, flow_dir_values):
            i_next, j_next = direction_and_value.to_indices(ix, jy, dir_val)
            the_key = (i_next, j_next)
            if ij_to_cell.has_key(the_key):
                next_cell = ij_to_cell[the_key]
            else:
                next_cell = None
            assert isinstance(aCell, Cell)
            aCell.set_next(next_cell)

        #determine list of indices of the previous cells for each cell
        #in this case they are equal to the ids

        for aCell in cell_list:
            assert isinstance(aCell, Cell)
            prev_cells = aCell.get_upstream_cells()
            prev_cell_indices.append(map(lambda c: c.id, prev_cells))
            prev_cell_indices[-1].append(aCell.id)



    if not old:
        da_2d = data_select.get_field_from_file(path_to_analysis_driven, 'accumulation_area')
        lons = data_select.get_field_from_file(path_to_analysis_driven, field_name = 'longitude')
        lats = data_select.get_field_from_file(path_to_analysis_driven, field_name = 'latitude')
    else:
        lons = polar_stereographic.lons
        lats = polar_stereographic.lats
        da_2d = np.zeros(lons.shape)
        drainage = data_select.get_field_from_file(path_to_analysis_driven, 'drainage')
        for i, j, theDa in zip(i_list, j_list, drainage):
            da_2d[i, j] = theDa




    data_step = timedelta(days = 1)


    stations_dump = 'stations_dump.bin'
    if os.path.isfile(stations_dump):
        print 'unpickling'
        stations = pickle.load(open(stations_dump))
    else:
        stations = read_station_data()
        pickle.dump(stations, open(stations_dump, 'w'))

#   Did this to solve text encoding issues
#    reload(sys)
#    sys.setdefaultencoding('iso-8859-1')


    selected_stations = []
    selected_model_values = []
    selected_station_values = []

    grid_drainages = []
    grid_lons = []
    grid_lats = []
    plot_utils.apply_plot_params(width_pt= None, font_size=9, aspect_ratio=2.5)
    #plot_utils.apply_plot_params(font_size=9, width_pt=None)
    ncols = 2
    gs = gridspec.GridSpec(5, ncols)
    fig = plt.figure()

    assert isinstance(fig, Figure)

    current_subplot = 0

    label1 = "modelled"
    label2 = "observed"
    line1 = None
    line2 = None
    lines_for_mems = None
    labels_for_mems = None
    #fig.subplots_adjust(hspace = 0.9, wspace = 0.4, top = 0.9)




    index_objects = []
    for index, i, j in zip( range(len(i_list)) , i_list, j_list):
        index_objects.append(IndexObject(positionIndex = index, i = i, j = j))

    #sort by latitude
    index_objects.sort( key = lambda x: x.j, reverse = True)

    #simulation id to continuous data map
    simIdToContData = {}
    for the_id in members.all_current:
        simIdToContData[the_id] = {}

    for indexObj in index_objects:
        i = indexObj.i
        j = indexObj.j
        # @type indexObj IndexObject
        index = indexObj.positionIndex
        station = get_corresponding_station(lons[i, j], lats[i, j], da_2d[i, j], stations)


        if station is None or station in selected_stations:
            continue

        #if you want to compare with stations add their ids to the selected
        if station.id not in selected_station_ids:
            continue


        #skip some stations
        if station.id in skip_ids:
            continue


        #try now to find the point with the closest drainage area
#        current_diff = np.abs(station.drainage_km2 - da_2d[i, j])
#        for di in xrange(-1,2):
#            for dj in xrange(-1,2):
#                the_diff = np.abs(station.drainage_km2 - da_2d[i + di, j + dj])
#                if the_diff < current_diff: #select different grid point
#                    current_diff = the_diff
#                    i = i + di
#                    j = j + dj
#                    indexObj.i = i
#                    indexObj.j = j




        #found station plot data
        print station.name


        start_date = max( np.min(times), np.min(station.dates))
        end_date = min( np.max(times),  np.max(station.dates))

        if start_date.day > 1 or start_date.month > 1:
            start_date = datetime(start_date.year + 1, 1, 1,0,0,0)

        if end_date.day < 31 or end_date.month < 12:
            end_date = datetime(end_date.year - 1, 12, 31,0,0,0)



        if end_date < start_date:
            continue


        #select data for years that do not have gaps
        start_year = start_date.year
        end_year = end_date.year
        continuous_station_data = {}
        continuous_model_data = {}
        num_of_continuous_years = 0
        for year in xrange(start_year, end_year + 1):
            # @type station Station
            station_data = station.get_continuous_dataseries_for_year(year)
            if len(station_data) >= 365:
                num_of_continuous_years += 1

                #save station data
                for d, v in station_data.iteritems():
                    continuous_station_data[d] = v

                #save model data
                for t_index, t in enumerate(times):
                    if t.year > year: break
                    if t.year < year: continue
                    continuous_model_data[t] = data[t_index, index]
                #fill the map sim id to cont model data
                for the_id in members.current_ids:
                    #save model data
                    for t_index, t in enumerate(simIdToTimes[the_id]):
                        if t.year > year: break
                        if t.year < year: continue
                        simIdToContData[the_id][t] = simIdToData[the_id][t_index, index]


        #if the length of continuous observation is less than 10 years, skip
        if len(continuous_station_data) < 3650: continue

        print 'Number of continuous years for station %s is %d ' % (station.id, num_of_continuous_years)

        #skip stations with less than 20 years of usable data
        #if num_of_continuous_years < 2:
        #    continue

        selected_stations.append(station)

#        plot_total_precip_for_upstream(i_index = i, j_index = j, station_id = station.id,
#                                        subplot_count = current_subplot,
#                                        start_date = datetime(1980,01,01,00),
#                                        end_date = datetime(1996,12,31,00)
#                                        )

        #tmp (if do not need to replot streamflow)
#        current_subplot += 1
#        continue

        ##Calculate means for each day of year,
        ##as a stamp year we use 2001, ignoring the leap year
        stamp_year = 2001
        start_day = datetime(stamp_year, 1, 1, 0, 0, 0)
        stamp_dates = []
        mean_data_model = []
        mean_data_station = []
        simIdToMeanModelData = {}
        for the_id in members.all_current:
            simIdToMeanModelData[the_id] = []

        for day_number in xrange(365):
            the_day = start_day + day_number * data_step
            stamp_dates.append(the_day)

            model_data_for_day = []
            station_data_for_day = []

            #select model data for each simulation, day
            #and then save mean for each day
            simIdToModelDataForDay = {}
            for the_id in members.current_ids:
                simIdToModelDataForDay[the_id] = []

            for year in xrange(start_year, end_year + 1):
                the_date = datetime(year, the_day.month, the_day.day, the_day.hour, the_day.minute, the_day.second)
                if continuous_station_data.has_key(the_date):
                    model_data_for_day.append(continuous_model_data[the_date])
                    station_data_for_day.append(continuous_station_data[the_date])
                    for the_id in members.current_ids:
                        simIdToModelDataForDay[the_id].append(simIdToContData[the_id][the_date])

            assert len(station_data_for_day) > 0
            mean_data_model.append(np.mean(model_data_for_day))
            mean_data_station.append(np.mean(station_data_for_day))
            for the_id in members.current_ids:
                simIdToMeanModelData[the_id].append(np.mean(simIdToModelDataForDay[the_id]))


         #skip stations with small discharge
        #if np.max(mean_data_station) < 300:
        #    continue

        row = current_subplot// ncols
        col = current_subplot % ncols
        ax = fig.add_subplot(gs[row, col])
        assert isinstance(ax, Axes)
        current_subplot += 1

        #put "Streamflow label on the y-axis"
        if row == 0 and col == 0:
            ax.annotate("Streamflow (${\\rm m^3/s}$)", (0.025, 0.7) , xycoords = "figure fraction",
                rotation = 90, va = "top", ha = "center")

        selected_dates = sorted( continuous_station_data.keys() )
        unrouted_stfl = get_unrouted_streamflow_for(selected_dates = selected_dates,
            all_dates=times, tot_runoff=tot_rof, cell_indices=prev_cell_indices[index])

        unrouted_daily_normals = data_select.get_means_for_stamp_dates(stamp_dates, all_dates= selected_dates,
            all_data=unrouted_stfl)

        #Calculate Nash-Sutcliff coefficient
        mean_data_model = np.array(mean_data_model)
        mean_data_station = np.array( mean_data_station )

        #mod = _get_monthly_means(stamp_dates, mean_data_model)
        #sta = _get_monthly_means(stamp_dates, mean_data_station)

        month_dates = [ datetime(stamp_year, m, 1) for m in xrange(1,13) ]


        line1, = ax.plot(stamp_dates, mean_data_model, linewidth = 3, color = "b")
        #line1, = ax.plot(month_dates, mod, linewidth = 3, color = "b")
        upper_model = np.max(mean_data_model)

        line2, = ax.plot(stamp_dates, mean_data_station, linewidth = 3, color = "r")
        #line2, = ax.plot(month_dates, sta, linewidth = 3, color = "r")

        #line3, = ax.plot(stamp_dates, unrouted_daily_normals, linewidth = 3, color = "y")


        mod = mean_data_model
        sta = mean_data_station

        ns = 1.0 - np.sum((mod - sta) ** 2) / np.sum((sta - np.mean(sta)) ** 2)

        if np.abs(ns) < 0.001:
            ns = 0

        corr_coef = np.corrcoef([mod, sta])[0,1]
        ns_unr = 1.0 - np.sum((unrouted_daily_normals - sta) ** 2) / np.sum((sta - np.mean(sta)) ** 2 )
        corr_unr = np.corrcoef([unrouted_daily_normals, sta])[0, 1]

        da_diff = (da_2d[i, j] - station.drainage_km2) / station.drainage_km2 * 100
        ax.annotate("ns = %.2f\nr = %.2f"
                  % (ns, corr_coef), (0.95, 0.90), xycoords = "axes fraction",
            va = "top", ha = "right",
            font_properties = FontProperties(size = 9)
        )



        #plot member simulation data
        lines_for_mems = []
        labels_for_mems = []

        #lines_for_mems.append(line3)
        #labels_for_mems.append("Unrouted total runoff")


        for the_id in members.current_ids:
            the_line, = ax.plot(stamp_dates, simIdToMeanModelData[the_id], "--", linewidth = 3)
            lines_for_mems.append(the_line)
            labels_for_mems.append(the_id)


        ##calculate mean error
        means_for_members = []
        for the_id in members.current_ids:
            means_for_members.append(np.mean(simIdToMeanModelData[the_id]))





        upper_station = np.max(mean_data_station)
        upper_unr = np.max(unrouted_daily_normals)

        upper = np.max([upper_model, upper_station])
        upper = round(upper / 100 ) * 100
        half = round( 0.5 * upper / 100 ) * 100
        if upper <= 100:
            upper = 100
            half = upper / 2

        print half, upper
        print 10 * '='

        ax.set_yticks([0, half , upper])
        assert isinstance(station, Station)

        print("i = {0}, j = {1}".format(indexObj.i, indexObj.j))
        print(lons[i,j], lats[i,j])
        print("id = {0}, da_sta = {1}, da_mod = {2}, diff = {3} %".format(station.id ,station.drainage_km2, da_2d[i,j], da_diff))

        grid_drainages.append(da_2d[i, j])
        grid_lons.append(lons[i, j])
        grid_lats.append(lats[i, j])

        selected_station_values.append(mean_data_station)
        selected_model_values.append(mean_data_model)



        #plot_swe_for_upstream(i_index = i, j_index = j, station_id = station.id)




        #plt.ylabel("${\\rm m^3/s}$")
        west_east = 'W' if station.longitude < 0 else 'E'
        north_south = 'N' if station.latitude > 0 else 'S'
        title_data = (station.id, np.abs(station.longitude), west_east,
                                  np.abs(station.latitude), north_south)
        ax.set_title('%s: (%3.1f%s, %3.1f%s)' % title_data)


        date_ticks = []
        for month in xrange(1,13):
            the_date = datetime(stamp_year, month, 1)
            date_ticks.append(the_date)
            date_ticks.append(the_date + timedelta(days = 15))
        ax.xaxis.set_ticks(date_ticks)



        major_ticks = ax.xaxis.get_major_ticks()


        for imtl, mtl in enumerate(major_ticks):
            mtl.tick1line.set_visible(imtl % 2 == 0)
            mtl.tick2line.set_visible(imtl % 2 == 0)
            mtl.label1On = (imtl % 4 == 1)

#        ax.xaxis.set_major_locator(
#            mpl.dates.MonthLocator(bymonth = range(2,13,2))
#        )


        ax.xaxis.set_major_formatter(
            mpl.dates.DateFormatter('%b')
        )





    lines = [line1]
    lines.extend(lines_for_mems)
    lines.append(line2)
    lines = tuple( lines )


    labels = [label1]
    labels.extend(labels_for_mems)
    labels.append(label2)
    labels = tuple(labels)

    fig.legend(lines, labels, 'lower right', ncol = 1)
#    fig.text(0.05, 0.5, "Streamflow (${\\rm m^3/s}$)",
#                  rotation=90,
#                  ha = 'center', va = 'center'
#                  )


    fig.tight_layout(pad = 2)
    fig.savefig('performance_error.png')




    
   # assert len(selected_dates_with_gw[0]) == len(selected_station_dates[0])

    do_skill_calculation = True
    if do_skill_calculation:
        calculate_skills(selected_stations,
                        stamp_dates, selected_station_values,
                        selected_model_values,
                        grid_drainages,
                        grid_lons, grid_lats)


    do_plot_selected_stations = True
    if do_plot_selected_stations:
        plot_selected_stations(selected_stations, use_warpimage=False, plot_ts = False,
                               i_list = i_list, j_list = j_list)
def get_station_and_corresponding_model_data(path = 'data/streamflows/hydrosheds_euler10_spinup100yrs/aex_discharge_1970_01_01_00_00.nc'):
    result = {}
    saved_selected_stations_file = 'selected_stations_and_model_data.bin'
    if os.path.isfile(saved_selected_stations_file):
        result = pickle.load(open(saved_selected_stations_file))
    else:
        print 'getting data from file ', path


        [data, times, i_list, j_list] = data_select.get_data_from_file(path)
        drainage_area = data_select.get_field_from_file(path, field_name = 'accumulation_area')

        if drainage_area is not None:
            lons = data_select.get_field_from_file(path, field_name = 'longitude')
            lats = data_select.get_field_from_file(path, field_name = 'latitude')
            da_2d = drainage_area
        else:
            drainage_area = data_select.get_field_from_file(path, field_name = 'drainage')
            da_2d = np.zeros(polar_stereographic.xs.shape)
            lons = polar_stereographic.lons
            lats = polar_stereographic.lats
            for index, i, j in zip( range(len(i_list)) , i_list, j_list):
                da_2d[i, j] = drainage_area[index]




        stations_dump = 'stations_dump.bin'
        if os.path.isfile(stations_dump):
            print 'unpickling'
            stations = pickle.load(open(stations_dump))
        else:
            stations = read_station_data()
            pickle.dump(stations, open(stations_dump, 'w'))

        reload(sys)
        sys.setdefaultencoding('iso-8859-1')

        selected_stations = []
        for index, i, j in zip( range(len(i_list)) , i_list, j_list):
            station = get_corresponding_station(lons[i, j], lats[i, j], da_2d[i, j], stations)
            if station is None or station in selected_stations:
                continue
            selected_stations.append(station)
            data_point = ModelPoint(times, data[:, index])
            result[station] = data_point

            print '=' * 20
            print station.get_timeseries_length() , station.id
            #found station plot data
            print station.name
            print station.id

        pickle.dump(result, open(saved_selected_stations_file,'wb'))

#    for station, point in result.iteritems():
#        plt.plot(station.dates, station.values, label = station.name)
#    plt.legend()
#    plt.show()
    assert len(result) > 0
    return result
Пример #19
0
def compare_means(member = 'aet', my_data_path = '',
                  start_date = datetime(1961, 1, 1, 0, 0),
                  end_date = datetime(1990, 12, 31, 0, 0)):
    streamflows, times, i_array, j_array = data_select.get_data_from_file(my_data_path)

    
    event_duration = timedelta(days = 1)

    my_data = data_select.get_list_of_annual_maximums_for_domain(streamflows, times,
                                    start_date = start_date, end_date = end_date,
                                    start_month = 1, end_month = 12,
                                    event_duration = event_duration)


    data_path = 'data/streamflows/Vincent_annual_max/mapHIGH_{0}.txt'.format(member)
    v = VincentMaximumsReader(data_path = data_path)

    the_format = '{0}: i = {1}, j = {2}, min = {3}, max = {4}, mean = {5}'
    vmeans = []
    vmins = []
    vmaxs = []
   # my_data = 500 * np.ones((10,547))
    for i, j, the_index in zip(i_array, j_array, range(my_data.shape[1])):
        data = my_data[:, the_index]
        print the_format.format('Sasha', i, j, np.min(data), np.max(data), np.mean(data))
        data = v.get_data_at(i + 1, j + 1)
        print the_format.format('Vincent', i, j, np.min(data), np.max(data), np.mean(data))
        vmeans.append(np.mean(data))
        vmins.append(np.min(data))
        vmaxs.append(np.max(data))
        print '=' * 30


    #scatter plot for means
    plt.subplots_adjust(hspace = 0.5)

    plt.subplot(2,2,1)
    plt.title('annual maximums, \n average for each grid point', override)
    plt.scatter( vmeans , np.mean(my_data, axis = 0), linewidth = 0)
    plt.xlabel('Vincent')
    plt.ylabel('Sasha')

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)
    
    #scatter plot for minimums
    plt.subplot(2,2,2)
    plt.title('annual maximums, \n minimum for each grid point', override)

    plt.scatter( vmins , np.min(my_data, axis = 0), linewidth = 0)
    plt.xlabel('Vincent')
    plt.ylabel('Sasha')

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)
    
    #scatter plot for minimums
    plt.subplot(2,2,3)
    plt.title('annual maximums, \n maximum for each grid point', override)
    plt.scatter( vmaxs , np.max(my_data, axis = 0), linewidth = 0)
    plt.xlabel('Vincent')
    plt.ylabel('Sasha')

    x = plt.xlim()
    plt.plot(x,x, color = 'k')
    plt.grid(True)
    plt.savefig('{0}_scatter_max.png'.format(member), bbox_inches = 'tight')
Пример #20
0
    def init_from_path(self, path = ''):
        self._data, self.times, \
        self.i_indices, self.j_indices = data_select.get_data_from_file(path)

        pass
Пример #21
0
def main(data_path = DEFAULT_PATH):
    #get data to memory
    [data, times, x_indices, y_indices] = data_select.get_data_from_file(data_path)
    the_mean = np.mean(data, axis = 0)

    lons2d, lats2d = polar_stereographic.lons, polar_stereographic.lats
    lons = lons2d[x_indices, y_indices]
    lats = lats2d[x_indices, y_indices]


    #colorbar
    wres = Ngl.Resources()
    wres.wkColorMap = "BlGrYeOrReVi200"

    wks_type = "ps"
    wks = Ngl.open_wks(wks_type,"test_pyngl", wres)


    #plot resources
    res = Ngl.Resources()
    res.cnFillMode          = "RasterFill"
    #res.cnFillOn               = True          # Turn on contour fill
    #res.cnMonoFillPattern     = True     # Turn solid fill back on.
    #res.cnMonoFillColor       = False    # Use multiple colors.
    res.cnLineLabelsOn        = False    # Turn off line labels.
    res.cnInfoLabelOn         = False    # Turn off informational
    res.pmLabelBarDisplayMode = "Always" # Turn on label bar.
    res.cnLinesOn             = False    # Turn off contour lines.


    res.mpProjection = "LambertConformal"
    res.mpDataBaseVersion = "MediumRes"


#    res.mpLimitMode         = "LatLon"     # limit map via lat/lon
#    res.mpMinLatF           =  np.min(lats)         # map area
#    res.mpMaxLatF           =  np.max(lats)         # latitudes
#    res.mpMinLonF           =  np.min( lons )         # and
#    res.mpMaxLonF           =  np.max( lons )         # longitudes





    print np.min(lons), np.max(lons)



    res.tiMainFont      = 26
    res.tiXAxisFont     = 26
    res.tiYAxisFont     = 26

    res.sfXArray = lons2d
    res.sfYArray = lats2d
    #
    # Set title resources.
    #
    res.tiMainString         = "Logarithm of mean annual streamflow m**3/s"

    to_plot = np.ma.masked_all(lons2d.shape)
    to_plot[x_indices, y_indices] = np.log(the_mean[:])
#    for i, j, v in zip(x_indices, y_indices, the_mean):
#        to_plot[i, j] = v
    Ngl.contour_map(wks, to_plot[:,:], res)
    Ngl.end()



    pass
def get_std_and_mean_using_bootstrap_for_merged_means(sim_ids = None, folder_path = "data/streamflows/hydrosheds_euler9",
                                     months = range(1, 13), n_samples = 1000):

    """
    returns the object containing means for the domain and standard deviations from bootstrap
    """
    cache_file = _get_cache_file_path(months=months, sim_ids = sim_ids)
    if os.path.isfile(cache_file):
       return pickle.load(open(cache_file))


    #determine path to the file with data
    filePaths = []
    for f in os.listdir(folder_path):
        if f.split("_")[0] in sim_ids:
            filePath = os.path.join(folder_path, f)
            filePaths.append(filePath)



    boot_means = []
    real_means = []
    index_matrix = None

    all_means = []
    members_boot_means = []
    for file_path in filePaths:
        streamflow, times, i_indices, j_indices = data_select.get_data_from_file(file_path)

        #for each year and for each gridcell get mean value for the period
        means_dict = data_select.get_means_over_months_for_each_year(times, streamflow, months = months)

        means_sorted_in_time = map( lambda x : x[1], sorted(means_dict.items(), key=lambda x: x[0]) )
        data_matrix = np.array(means_sorted_in_time)


        real_means.append(data_matrix) #save modelled means, in order to calculate mean of the merged data
        #print "data_matrix.shape = ", data_matrix.shape
        boot_means = []
        for i in xrange(n_samples):
            #generate indices
            index_vector = np.random.randint(0, data_matrix.shape[0], data_matrix.shape[0])

            #average 30 bootstrapped annual means
            boot_means.append( np.mean(data_matrix[index_vector,:], axis = 0) )
    
        members_boot_means.append( boot_means )
    
    #take average over members
    print np.array(members_boot_means).shape
    boot_means = np.array(members_boot_means).mean(axis = 0) #nsamples x npoints

    print boot_means[:, 499]
    print boot_means[:, 19]
    assert boot_means.shape[0] == n_samples, boot_means.shape

    print "boot_means.shape = ", boot_means.shape
    std_result = np.std(boot_means, axis = 0)
    mean_result = np.array(real_means).mean(axis = 0).mean(axis = 0)
    pickle.dump([std_result, mean_result], open(cache_file, mode="w"))
    return std_result, mean_result