def read_station_data(folder = 'data/cehq_measure_data'):
    stations = []
    for file in os.listdir(folder):
        if not '.txt' in file:
            continue
        path = os.path.join(folder, file)
        s = Station()
        s.parse_from_cehq(path)
        stations.append(s)
    return stations
Пример #2
0
def plot_subsurf_runoff(ax, station, sim_name_to_station_to_model_point, sim_names=None, day_stamps=None):
    assert isinstance(ax, Axes)


    #initialize day stamps if it is not passed
    if day_stamps is None:
        day_stamps = Station.get_stamp_days(2001)

    ax.plot(day_stamps, [0] * len(day_stamps), "k", lw=0) #so the colors correspond to the same simulation on all panels

    handles = []
    labels = []

    ax.set_title("Subsurface runoff (${\\rm m^3/s}$)")
    coef = 1.0e-3 #to convert mm to meters
    for sim_name in sim_names:
        if station not in sim_name_to_station_to_model_point[sim_name]:
            continue

        mps = sim_name_to_station_to_model_point[sim_name][station]

        h = None
        for mp in mps:
            the_area = mp.accumulation_area * 1.0e6
            dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="TDRA")
            label = "{0}".format(sim_name, np.mean(values))
            h = ax.plot(dates, values * the_area * coef, label=label, lw=3)

            handles.append(h[0])
            labels.append(label)

            #ax.xaxis.set_major_formatter(DateFormatter("%d\n%b"))
            #ax.xaxis.set_major_locator(MonthLocator(bymonth=range(1,13,3), bymonthday=15 ))

    return labels, handles
Пример #3
0
def get_station_objects(start_year=1980, end_year=2010, sel_names=None):
    # read ice depth values
    df = get_obs_data()

    lon_min, lon_max = -100, 0
    lat_min, lat_max = 40, 90

    nvals_min = 100

    p = parser.parse(STATION_COORDS_FILE.open())

    root = p.getroot()

    station_elts = root.Document.Placemark

    # select points based on the lat/lon limits?
    stations = []
    for el in station_elts:

        lon, lat, _ = [float(c.strip()) for c in el.Point.coordinates.text.split(",")]

        # Check if the station
        if sel_names is not None:

            is_ok = False

            for sel_name in sel_names:
                if sel_name.lower() in el.name.text.lower():
                    is_ok = True
                    break

            if not is_ok:
                continue

        if (lon_min <= lon <= lon_max) and (lat_min <= lat <= lat_max):
            print("{}: {}".format(el.name, el.Point.coordinates))

            df_s = df.loc[df.station_name.str.lower().str.startswith(el.name.text.lower())]

            df_s = df_s.loc[(df_s.year >= start_year) & (df_s.year <= end_year)]

            if len(df_s) < nvals_min:
                continue

            print(len(df_s))
            d_to_v = dict(zip(df_s["Date"][:], df_s["ice_depth"][:]))

            # df_s.plot(x="Date", y="ice_depth")
            # plt.title(el.name.text)
            # plt.show()

            # print(df_s.station_name)

            stations.append(Station(st_id=df_s.station_name.iloc[0], lon=lon, lat=lat, date_to_value=d_to_v))

    return stations
Пример #4
0
def plot_hydrographs(ax, station, sim_name_to_station_to_model_point,
                     day_stamps=None, sim_names=None):
    """
    Plot climatological hydrographs
    """
    assert isinstance(station, Station)
    assert isinstance(ax, Axes)

    years = station.get_list_of_complete_years()

    #initialize day stamps if it is not passed
    if day_stamps is None:
        day_stamps = Station.get_stamp_days(2001)

    if len(years) < 6:
        return

    handles = []
    labels = []
    dates, obs_data = station.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years)
    obs_ann_mean = np.mean(obs_data)
    label = "Obs: ann.mean = {0:.1f}".format(obs_ann_mean)
    h = ax.plot(dates, obs_data, "k", lw=2, label=label)

    handles.append(h[0])
    labels.append(label)

    mp = None

    for sim_name in sim_names:
        if station in sim_name_to_station_to_model_point[sim_name]:
            continue

        mps = sim_name_to_station_to_model_point[sim_name][station]
        for mp in mps:
            assert isinstance(mp, ModelPoint)
            dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="STFL")

            label = "{0}: {1:.2f} \n ann.mean = {2:.1f}".format(sim_name,
                                                                mp.mean_upstream_lake_fraction, np.mean(values))
            h = ax.plot(dates, values, label=label, lw=3)

            handles.append(h[0])
            labels.append(label)

    ax.xaxis.set_major_formatter(DateFormatter("%d\n%b"))
    ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1, 13, 3)), bymonthday=15))

    if mp is None:
        return
    ax.set_title("{0}: point lake fr.={1:.2f}".format(station.id, mp.lake_fraction))
    return labels, handles
def dotest(sim_name_to_station_to_model_point):
    day_stamps = Station.get_stamp_days(2001)
    for sim_name, station_to_mp in sim_name_to_station_to_model_point.items():
        st_to_mp = sim_name_to_station_to_model_point[sim_name]

        for station, mp in st_to_mp.items():
            assert isinstance(mp, ModelPoint)
            years = station.get_list_of_complete_years()
            d,v = mp.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years)
            plt.plot(d,v)
            plt.show()

            raise Exception()
Пример #6
0
def plot_swe_1d_compare_with_obs(ax, station, sim_name_to_station_to_model_point,
                                 day_stamps=None, sim_names=None):
    """
    Plot climatological swe over upstream points, to the model point corresponding to the station
    sim_name_to_station_to_model_point is a complex dictionary:
    {simulation name: { station: [mp1, mp2, ..., mpN] }}

    sim_name_to_manager:
    {simulation name: Crcm5ModelDataManager object}


    Nad compare with the analysis by Ross Brown
    """
    assert isinstance(station, Station)

    assert isinstance(ax, Axes)

    years = station.get_list_of_complete_years()

    #initialize day stamps if it is not passed
    if day_stamps is None:
        day_stamps = Station.get_stamp_days(2001)

    if len(years) < 6: return {}

    #suppose here that values and times are ordered accordingly in pandas.Timeseries
    obs_data = station.mean_swe_upstream_daily_clim.values
    time = station.mean_swe_upstream_daily_clim.index.to_pydatetime()
    obs_ann_mean = np.mean(obs_data)

    handles = []
    labels = []
    label = "Obs: ann.mean = {0:.1f}".format(obs_ann_mean)
    h = ax.plot(time, obs_data, "k", label=label, lw=2)

    handles.append(h[0])
    labels.append(label)

    ax.set_title("SWE (mm)")
    for sim_name in sim_names:
        if station not in sim_name_to_station_to_model_point[sim_name]:
            continue

        mps = sim_name_to_station_to_model_point[sim_name][station]

        h = None
        for mp in mps:
            assert isinstance(mp, ModelPoint)

            dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="I5")
            label = "{0}: {1:.2f} \n ann.mean = {2:.1f}".format(sim_name, mp.mean_upstream_lake_fraction
                , np.mean(values))
            h = ax.plot(dates, values, label=label, lw=3)

            handles.append(h[0])
            labels.append(label)

            #ax.xaxis.set_major_formatter(DateFormatter("%d\n%b"))
            #ax.xaxis.set_major_locator(MonthLocator(bymonth=range(1,13,3), bymonthday=15 ))

    return labels, handles
Пример #7
0
def validate_daily_climatology():
    """

    """
    #years are inclusive
    start_year = 1979
    end_year = 1988

    #sim_name_list = ["crcm5-r",  "crcm5-hcd-r", "crcm5-hcd-rl"]
    sim_name_list = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    rpn_folder_paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup".format(sim_name_list[0]),
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup2/Samples_all_in_one_folder".format(
            sim_name_list[1])
    ]

    nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb"


    #select stations
    selected_ids = None
    selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
                    "041903", "040830", "093806", "090613", "081002", "093801", "080718"]

    selected_ids = ["074903", ]

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    selected_ids = None
    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date, end_date=end_date
    )

    stations_hydat = cehq_station.read_hydat_station_data(folder_path="/home/huziy/skynet3_rech1/HYDAT",
                                                          start_date=start_date, end_date=end_date)

    stations.extend(stations_hydat)

    varname = "STFL"
    sim_name_to_manager = {}
    sim_name_to_station_to_model_point = {}

    day_stamps = Station.get_stamp_days(2001)
    sweManager = SweDataManager(var_name="SWE")
    cruTempManager = CRUDataManager(lazy=True)
    cruPreManager = CRUDataManager(var_name="pre", lazy=True,
                                   path="data/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.pre.dat.nc")

    #common lake fractions when comparing simulations on the same grid
    all_model_points = []

    cell_manager = None

    for sim_name, rpn_folder in zip(sim_name_list, rpn_folder_paths):

        dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder, file_name_prefix="dm",
                                          all_files_in_samples_folder=True, need_cell_manager=cell_manager is None)


        #here using the fact that all the simulations are on the same grid
        if cell_manager is None:
            cell_manager = dmManager.cell_manager
        else:
            dmManager.cell_manager = cell_manager



        #determine comon lake fractions, so it is not taken from the trivial case lf = 0, but note
        #this has only sense when all the simulations were performed on the same grid
        sim_name_to_manager[sim_name] = dmManager

        nc_sim_folder = os.path.join(nc_db_folder, sim_name)
        nc_path = os.path.join(nc_sim_folder, "{0}_all.nc4".format(varname))


        #In general there are several model points corresponding to a given station
        st_to_mp = dmManager.get_model_points_for_stations(stations, sim_name=sim_name,
                                                           nc_path=nc_path,
                                                           nc_sim_folder=nc_sim_folder,
                                                           set_data_to_model_points=True)

        print("got model points for stations")

        sim_name_to_station_to_model_point[sim_name] = st_to_mp

        #save model points to a list of all points
        for s, mps in st_to_mp.items():
            assert isinstance(s, Station)
            for mp in mps:
                assert isinstance(mp, ModelPoint)
                #calculate upstream swe if needed
                if s.mean_swe_upstream_daily_clim is None:
                    s.mean_swe_upstream_daily_clim = sweManager.get_mean_upstream_timeseries_daily(mp, dmManager,
                                                                                                   stamp_dates=day_stamps)
                    #These are taken from CRU dataset, only monthly data are available
                    s.mean_temp_upstream_monthly_clim = cruTempManager.get_mean_upstream_timeseries_monthly(mp,
                                                                                                            dmManager)
                    s.mean_prec_upstream_monthly_clim = cruPreManager.get_mean_upstream_timeseries_monthly(mp,
                                                                                                           dmManager)

                    print("Calculated observed upstream mean values...")
            all_model_points.extend(mps)

    print("imported input data successfully, plotting ...")


    #for tests
    #test(sim_name_to_station_to_model_point)

    #select only stations which have corresponding model points
    stations = list(sim_name_to_station_to_model_point[sim_name_list[0]].keys())

    from matplotlib.backends.backend_pdf import PdfPages


    for s in stations:
        years = s.get_list_of_complete_years()
        if len(years) < 6: continue #skip stations with less than 6 continuous years of data

        pp = PdfPages("nc_diagnose_{0}.pdf".format(s.id))

        #plot hydrographs
        fig = plt.figure()
        gs = gridspec.GridSpec(3, 3, left=0.05, hspace=0.3, wspace=0.2)
        ax_stfl = fig.add_subplot(gs[0, 0])
        labels, handles = plot_hydrographs(ax_stfl, s, sim_name_to_station_to_model_point,
                                           day_stamps=day_stamps, sim_names=sim_name_list
        )
        plt.setp(ax_stfl.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        fig.legend(handles, labels, "lower right")

        #plot swe 1d compare with obs
        ax_swe = fig.add_subplot(gs[1, 0], sharex=ax_stfl)
        plot_swe_1d_compare_with_obs(ax_swe, s, sim_name_to_station_to_model_point,
                                     day_stamps=day_stamps, sim_names=sim_name_list)


        #plot mean temp 1d compare with obs   -- here plot biases directly...??
        ax_temp = fig.add_subplot(gs[0, 2])
        plot_temp_1d_compare_with_obs(ax_temp, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax_temp.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        #plot mean precip 1d compare with obs   -- here plot biases directly...??
        ax = fig.add_subplot(gs[1, 2], sharex=ax_temp)
        plot_precip_1d_compare_with_obs(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)


        #plot mean Surface and subsurface runoff
        ax = fig.add_subplot(gs[0, 1], sharex=ax_stfl)
        plot_surf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        ax = fig.add_subplot(gs[1, 1], sharex=ax_stfl)
        plot_subsurf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        ax = fig.add_subplot(gs[2, 1], sharex=ax_stfl)
        plot_total_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)

        pp.savefig()
        #plot flow direction and basin boundaries
        fig = plt.figure()
        gs = gridspec.GridSpec(1, 2, right=0.99, bottom=0.001)
        ax = fig.add_subplot(gs[0, 1])
        plot_flow_directions_and_basin_boundaries(ax, s, sim_name_to_station_to_model_point,
                                                  sim_name_to_manager=sim_name_to_manager)
        pp.savefig()



        #plot 2d correlation between wind speed and measured streamflow at the station



        pp.close()
Пример #8
0
def load_stations_from_csv(
        index_file="mh/obs_data/streamflow_data_organized/station_index.txt",
        selected_ids=None):

    res = []

    data_dir = Path(index_file).parent

    with open(index_file) as f:

        # skip the first line
        f.readline()

        for line in f:
            if line.strip() == "":
                continue

            toks = re.split("\s+", line)

            st_id = toks[0].strip()

            if (selected_ids is not None) and (st_id not in selected_ids):
                continue

            lon, lat, = [float(tok.strip()) for tok in toks[1:3]]
            st_da = None
            try:
                st_da = float(toks[3].strip())
            except Exception:
                pass

            st_name = " ".join(toks[5:]).split(",")[0]

            s = Station(st_id=st_id, lon=lon, lat=lat, name=st_name)
            s.source_data_units = toks[4].strip()
            s.drainage_km2 = st_da

            print(s)

            ts = read_data_file_for_station(
                s, data_file=Path(data_dir.joinpath("{}.csv".format(s.id))))

            ts.dropna(inplace=True)

            # if it is date do nothing
            if hasattr(ts.iloc[0, 0], "year"):
                pass
            # convert to dates if it is just a year
            elif isinstance(ts.iloc[0, 0], str):
                date_format = None
                # try different known date formats
                for the_date_format in known_date_formats:
                    try:
                        datetime.strptime(ts.iloc[0, 0], the_date_format)
                        date_format = the_date_format
                    except Exception:
                        pass

                if date_format is None:
                    raise Exception(
                        "Do not understand this date format: {}".format(
                            ts.iloc[0, 0]))

                ts[0] = [
                    datetime.strptime(t, date_format) for t in ts.iloc[:, 0]
                ]

            elif float(ts.iloc[
                    0, 0]).is_integer():  # in case we have only year values
                ts[0] = [datetime(int(y), 6, 15) for y in ts.iloc[:, 0]]

            else:
                print(ts.iloc[0, 0])
                raise Exception("Could not convert {} to a date".format(
                    ts.iloc[0, 0]))

            print(ts.head())

            # start - plot for debug

            # fig = plt.figure()
            # ax = plt.gca()
            # ax.set_title(s.id)
            # ts.plot(ax=ax, x=0, y=1)
            # fig.autofmt_xdate()
            #
            # img_file = img_folder.joinpath("{}.png".format(s.id))
            # fig.savefig(str(img_file))

            # end - plot for debug

            set_data_from_pandas_timeseries(ts, s, date_col=0)

            res.append(s)

    return res
def validate_daily_climatology():
    """

    """
    #years are inclusive
    start_year = 1979
    end_year =1988

    sim_name_list = ["crcm5-r", "crcm5-hcd-r", "crcm5-hcd-rl"]
    rpn_folder_path_form = "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup"
    nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb"



    #select stations
    selected_ids = None
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    print("start reading cehq obs data")
#    stations = cehq_station.read_station_data(selected_ids = selected_ids,
#            start_date=start_date, end_date=end_date
#    )
    stations = []

    print("start reading hydat obs data")
    stations.extend(cehq_station.read_hydat_station_data(folder_path="/home/huziy/skynet3_rech1/HYDAT",
            start_date = start_date, end_date = end_date))

    print("finished reading station data")

    varname = "STFL"
    sim_name_to_manager = {}
    sim_name_to_station_to_model_point = {}
    dmManager = None



    for sim_name in sim_name_list:
        print(sim_name)
        rpn_folder = rpn_folder_path_form.format(sim_name)

        dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder, file_name_prefix="dm",
            all_files_in_samples_folder=True, need_cell_manager=True)

        sim_name_to_manager[sim_name] = dmManager

        nc_sim_folder = os.path.join(nc_db_folder, sim_name)
        nc_path = os.path.join(nc_sim_folder, "{0}_all.nc".format(varname))

        print("get model points for the stations")

        st_to_mp = dmManager.get_model_points_for_stations(stations, nc_path=nc_path, npoints=1,
            nc_sim_folder=nc_sim_folder
        )

        sim_name_to_station_to_model_point[sim_name] = st_to_mp


    common_lake_fractions = dmManager.lake_fraction

    #for tests
    #test(sim_name_to_station_to_model_point)

    print("finished reading data in memory")


    from matplotlib.backends.backend_pdf import PdfPages
    pp = PdfPages("comp_with_obs_daily_clim.pdf")


    stations_to_plot = [] #only stations that are compared with model are needed on a map
    day_stamps = Station.get_stamp_days(2001)
    for s in stations:
        plt.figure()

        assert isinstance(s, Station)

        years = s.get_list_of_complete_years()

        print(s)
        if len(years) < 6: continue

        dates, obs_data = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years)
        obs_ann_mean = np.mean(obs_data)

        plt.plot( dates, obs_data, label = "Obs: ann.mean = {0:.1f}".format( obs_ann_mean ) )

        mp = None
        for sim_name in sim_name_list:
            manager = sim_name_to_manager[sim_name]
            if s not in sim_name_to_station_to_model_point[sim_name]:
                continue

            mp = sim_name_to_station_to_model_point[sim_name][s]
            for mp in sim_name_to_station_to_model_point[sim_name][s]:
                assert isinstance(mp, ModelPoint)
                dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname = "STFL")
                plt.plot(dates, values , label = "{0}: {1:.2f} \n ann.mean = {2:.1f}, dist = {3:.1f} km".format( sim_name,
                    manager.lake_fraction[mp.flow_in_mask == 1].mean(), np.mean(values), mp.distance_to_station / 1000.0) )

                ax = plt.gca()
                assert isinstance(ax, Axes)

                ax.xaxis.set_major_formatter(DateFormatter("%d/%b"))
                ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1,13,3)), bymonthday=15 ))


            plt.legend(prop = FontProperties(size=8))

        if mp is None: continue
        stations_to_plot.append(s)
        plt.title("{0}: point lake fraction={1:.4f}".format(s.id, common_lake_fractions[mp.ix, mp.jy] ) )

        pp.savefig()



    #plot station positions
    plt.figure()
    bm = dmManager.get_rotpole_basemap_using_lons_lats(lons2d=dmManager.lons2D, lats2d=dmManager.lats2D, resolution="i")
    bm.drawcoastlines(linewidth=0.1)
    bm.drawrivers(linewidth=0.1)

    lons_list = [s.longitude for s in stations_to_plot]
    lats_list = [s.latitude for s in stations_to_plot]

    x_list, y_list = bm(lons_list, lats_list)
    bm.scatter(x_list, y_list, linewidths=0, s=0.5, zorder = 3)
    ax = plt.gca()
    for s, the_x, the_y in zip(stations, x_list, y_list):
        ax.annotate(s.id, xy=(the_x, the_y),xytext=(3, 3), textcoords='offset points',
            font_properties = FontProperties(size = 4), bbox = dict(facecolor = 'w', alpha = 1),
            ha = "left", va = "bottom", zorder = 2)
    pp.savefig()


    pp.close()