def do_timing(self): """Hack to do timing""" sts = datetime.datetime.now() _ = dep.read_env(get_path('good_env.txt')) ets = datetime.datetime.now() print("%.5f reads per second" % (1. / (ets - sts).total_seconds(), )) self.assertEquals(1, 2)
def test_read(): """Read a ENV file""" df = dep.read_env(get_path('good_env.txt')) df2 = df[df['date'] == datetime.datetime(2010, 6, 5)] assert len(df2.index) == 1 row = df2.iloc[0] assert row['runoff'] == 86.3
def main(): """Go Main Go.""" fig, ax = plt.subplots(1, 1) for huc12 in tqdm(MYHUCS): dfs = [] for scenario in [0, 1002]: for fn in glob.glob("/i/%s/env/%s/%s/*.env" % (scenario, huc12[:8], huc12[8:])): fpath = int(fn.split("/")[-1].split(".")[0].split("_")[1]) df = dep_utils.read_env(fn) df["fpath"] = fpath df["av_det"] = df["av_det"] * 4.163 df["scenario"] = scenario dfs.append(df) df = pd.concat(dfs) df["year"] = df["date"].dt.year gdf = (df[["year", "scenario", "fpath", "av_det"]].groupby(["year", "scenario", "fpath"]).sum()) gdf = gdf.reset_index() ptiles = np.arange(95, 100.01, 0.1) pc1 = np.percentile(gdf[gdf["scenario"] == 0]["av_det"].values, ptiles) pc2 = np.percentile(gdf[gdf["scenario"] == 1002]["av_det"].values, ptiles) ax.plot(pc1, pc2) ax.scatter(pc1[-1], pc2[-1], s=40, marker="s", color="b", zorder=3) ax.grid(True) ax.set_title(("95th Percentile and Higher Yearly Soil Delivery\n" "Max value shown by dot")) ax.set_xlabel("Production Yearly Soil Delivery T/a") ax.set_ylabel("New Flowpaths Yearly Soil Delivery T/a") ymax = max([ax.get_xlim()[1], ax.get_ylim()[1]]) ax.plot([0, ymax], [0, ymax], color="k", lw=2, label="x=y") fig.savefig("/tmp/test.png")
def test_read(self): """Read a ENV file""" df = dep.read_env(get_path('good_env.txt')) df2 = df[df['date'] == datetime.date(2010, 6, 5)] self.assertEqual(len(df2.index), 1) row = df2.iloc[0] self.assertEquals(row['runoff'], 86.3)
def main(): """Go Main Go""" # 272m slope length? database says 200m, prj has 266m and 6m ofe = read_ofe("/i/0/ofe/07040006/0203/070400060203_213.ofe") print("OFETOT %s" % (ofe["sedleave"].sum(), )) print("1 %s" % (ofe[ofe["ofe"] == 1]["sedleave"].sum(), )) print("2 %s" % (ofe[ofe["ofe"] == 2]["sedleave"].sum(), )) env = read_env("/i/0/env/07040006/0203/070400060203_213.env") print(env["sed_del"].sum())
def main(): """Go Main Go""" # 272m slope length? database says 200m, prj has 266m and 6m ofe = read_ofe('/i/0/ofe/07040006/0203/070400060203_213.ofe') print("OFETOT %s" % (ofe['sedleave'].sum(), )) print("1 %s" % (ofe[ofe['ofe'] == 1]['sedleave'].sum(), )) print("2 %s" % (ofe[ofe['ofe'] == 2]['sedleave'].sum(), )) env = read_env('/i/0/env/07040006/0203/070400060203_213.env') print(env['sed_del'].sum())
def readfile(huc12, fn): try: df = dep_utils.read_env(fn) except Exception as exp: print("\nABORT: Attempting to read: %s resulted in: %s\n" % (fn, exp)) return None key = "%s_%s" % (huc12, int(fn.split("/")[-1].split(".")[0].split("_")[1])) df['delivery'] = df['sed_del'] / lengths[key] return df
def readfile(fn, lengths): """Our env reader.""" try: df = dep_utils.read_env(fn) except Exception as exp: print("\nABORT: Attempting to read: %s resulted in: %s\n" % (fn, exp)) return None key = int(fn.split("/")[-1].split(".")[0].split("_")[1]) df['delivery'] = df['sed_del'] / lengths[key] return df
def readfile(huc12, fn): try: df = dep_utils.read_env(fn) except: return None key = "%s_%s" % (huc12, int(fn.split("/")[-1].split(".")[0].split("_")[1])) df["delivery"] = df["sed_del"] / lengths[key] df["flowpath"] = key df["length"] = lengths[key] return df
def readfile(huc12, fn): try: df = dep_utils.read_env(fn) except: return None key = "%s_%s" % (huc12, int(fn.split("/")[-1].split(".")[0].split("_")[1])) df['delivery'] = df['sed_del'] / lengths[key] df['flowpath'] = key df['length'] = lengths[key] return df
def readfile(fn, lengths): """Our env reader.""" try: df = dep_utils.read_env(fn) except Exception as exp: print("\nABORT: Attempting to read: %s resulted in: %s\n" % (fn, exp)) return None key = int(fn.split("/")[-1].split(".")[0].split("_")[1]) df["fpath"] = key df["delivery"] = df["sed_del"] / lengths[key] return df
def main(argv): """Go Main Go.""" huc12 = argv[1] fpath = argv[2] year = int(argv[3]) prop_cycle = plt.rcParams["axes.prop_cycle"] colors = prop_cycle.by_key()["color"] data = {} for scenario in range(59, 70): data[scenario] = read_env( "/i/%s/env/%s/%s/%s_%s.env" % (scenario, huc12[:8], huc12[8:], huc12, fpath)).set_index("date") print(data[scenario]["av_det"].sum()) ax = plt.axes([0.2, 0.1, 0.75, 0.75]) baseline = data[59][data[59].index.year == year] yticklabels = [] for scenario in range(60, 70): color = colors[scenario - 60] date = datetime.date(2000, 4, 15) + datetime.timedelta(days=(scenario - 60) * 5) scendata = data[scenario][data[scenario].index.year == year] delta = scendata["sed_del"] - baseline["sed_del"] delta = delta[delta != 0] total = ((scendata["sed_del"].sum() - baseline["sed_del"].sum()) / baseline["sed_del"].sum()) * 100.0 yticklabels.append("%s %4.2f%%" % (date.strftime("%b %d"), total)) x = delta.index.to_pydatetime() # res = ax.scatter(x, delta.values + (scenario - 60)) for idx, val in enumerate(delta): ax.arrow( x[idx], scenario - 60, 0, val, head_width=0.5, head_length=0.1, fc=color, ec=color, ) ax.axhline(scenario - 60, color=color) ax.set_xlim(datetime.date(year, 1, 1), datetime.date(year + 1, 1, 1)) ax.set_ylim(-0.5, 10) ax.xaxis.set_major_locator(mdates.DayLocator([1])) ax.xaxis.set_major_formatter(mdates.DateFormatter("%b")) ax.set_title( "huc12: %s fpath: %s\n%s Daily Change in Delivery vs Apr 10 Planting" % (huc12, fpath, year)) ax.grid(axis="x") ax.set_yticks(range(10)) ax.set_yticklabels(yticklabels) plt.gcf().savefig("test.png")
def main(): """Go Main Go.""" os.chdir("/i/0/env/10230003/1504") baseline = {} for fn in glob.glob("*.env"): fpath = fn.split(".")[0].split("_")[1] df = read_env(fn) row = df[df["date"] == pd.Timestamp(year=2014, month=6, day=15)] baseline[fpath] = row.iloc[0]["sed_del"] os.chdir("/i/1000/env/10230003/1504") scenario = {} for fn in glob.glob("*.env"): fpath = fn.split(".")[0].split("_")[1] df = read_env(fn) row = df[df["date"] == pd.Timestamp(year=2014, month=6, day=15)] scenario[fpath] = row.iloc[0]["sed_del"] df = pd.DataFrame({ "baseline": pd.Series(baseline), "scenario": pd.Series(scenario) }) df["diff"] = df["scenario"] - df["baseline"] print(df.sort_values("diff", ascending=False).head(1)) df2 = df[pd.isnull(df["diff"])] print(df2) (fig, ax) = plt.subplots(1, 1) ax.plot([0, 10000], [0, 10000], lw=2, color="r") ax.set_xlabel("Baseline avg: %.2f" % (df["baseline"].mean(), )) ax.set_ylabel("Previous avg: %.2f" % (df["scenario"].mean(), )) # keep this from impacting above df = df.fillna(0) ax.scatter(df["baseline"].values, df["scenario"].values) ax.set_title("15 Jun 2014 :: HUC12: 102300031504, raw sed_del") fig.savefig("/tmp/test.png")
def get_results(huc12): """Do as I say""" df = pd.DataFrame() for gorder, scenario in enumerate(SCEN2CODE): if gorder == 0: continue lendf = read_sql(""" SELECT fpath, ST_Length(geom) as length from flowpaths where scenario = %s and huc_12 = %s """, PGCONN, params=(scenario, huc12), index_col='fpath') for fpath, row in lendf.iterrows(): res = read_env(("/i/%s/env/%s/%s/%s_%s.env" ) % (scenario, huc12[:8], huc12[8:], huc12, fpath)) res = res[res['date'] < datetime.date(2017, 1, 1)] res['delivery'] = res['sed_del'] / row['length'] * 4.463 df.at[fpath, 'G%s_delivery_ta' % (gorder, )] = res['delivery'].sum() / 10. return df
def summarize_hillslopes(huc12, scenario): """Print out top hillslopes""" envs = glob.glob("/i/%s/env/%s/%s/*.env" % (scenario, huc12[:8], huc12[8:])) dfs = [] for env in envs: df = dep.read_env(env) df['flowpath'] = int(env.split("/")[-1].split("_")[1][:-4]) dfs.append(df) df = pd.concat(dfs) df2 = df[['sed_del', 'flowpath']].groupby( 'flowpath').sum().sort_values('sed_del', ascending=False) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY TOTALS") print(df2.head()) flowpath = df2.index[0] df2 = df[df['flowpath'] == flowpath].sort_values('sed_del', ascending=False) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY FOR %s" % (flowpath, )) print(df2[['date', 'sed_del', 'precip', 'runoff', 'av_det']].head()) df3 = df2.groupby('year').sum().sort_values('sed_del', ascending=False) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY EVENTS FOR %s" % (flowpath, )) print(df3[['sed_del', 'precip', 'runoff', 'av_det']].head())
def summarize_hillslopes(huc12, scenario): """Print out top hillslopes""" envs = glob.glob("/i/%s/env/%s/%s/*.env" % (scenario, huc12[:8], huc12[8:])) dfs = [] for env in envs: df = dep.read_env(env) df["flowpath"] = int(env.split("/")[-1].split("_")[1][:-4]) dfs.append(df) df = pd.concat(dfs) df2 = (df[["sed_del", "flowpath" ]].groupby("flowpath").sum().sort_values("sed_del", ascending=False)) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY TOTALS") print(df2.head()) flowpath = df2.index[0] df2 = df[df["flowpath"] == flowpath].sort_values("sed_del", ascending=False) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY FOR %s" % (flowpath, )) print(df2[["date", "sed_del", "precip", "runoff", "av_det"]].head()) df3 = df2.groupby("year").sum().sort_values("sed_del", ascending=False) print("==== TOP 5 HIGHEST SEDIMENT DELIVERY EVENTS FOR %s" % (flowpath, )) print(df3[["sed_del", "precip", "runoff", "av_det"]].head())
def do_scenario(scenario, plantdate, hucdf): """Process this scenario.""" index = pd.MultiIndex.from_product( [range(2008, 2019), range(1, 74)], names=["year", "period"]) df = pd.DataFrame(index=index).reset_index() def f(row): """Make date.""" return datetime.date(row["year"], 1, 1) + datetime.timedelta( days=int(row["period"] - 1) * 5 + 2) df["5day_middle_date"] = df.apply(f, axis=1) df = df.set_index(["year", "period"]) smdfs = [] flowpaths = 0 for _, row in hucdf.iterrows(): huc12 = row["HUC12"] for fn in glob.glob("/i/%s/wb/%s/%s/*" % (scenario, huc12[:8], huc12[8:])): smdfs.append(read_wb(fn)) flowpaths += 1 smdf = pd.concat(smdfs) del smdfs envdfs = [] for _, row in hucdf.iterrows(): huc12 = row["HUC12"] for fn in glob.glob("/i/%s/env/%s/%s/*" % (scenario, huc12[:8], huc12[8:])): envdfs.append(read_env(fn)) envdf = pd.concat(envdfs) envdf["jday"] = pd.to_numeric(envdf["date"].dt.strftime("%j"), downcast="integer") del envdfs # only one ofe 1 smdf = smdf[smdf["ofe"] == 1] smdf["period"] = (smdf["jday"] + 5) // 5 envdf["period"] = (envdf["jday"] + 5) // 5 # only consider 2008 thru 2018 data smdf = smdf[(smdf["year"] > 2007) & (smdf["year"] < 2019)] envdf = envdf[(envdf["year"] > 2007) & (envdf["year"] < 2019)] gdf = envdf.groupby(["year", "period"]).mean() df["5day_precip_mm"] = gdf["precip"] df["5day_detach_kgm2"] = gdf["av_det"] gdf = smdf.groupby(["year", "period"]).mean() df["5day_soilmoist"] = gdf["sw1"] gdf = envdf.groupby("year").sum() / flowpaths df = df.join(gdf[["precip", "av_det"]]) df = df.rename( { "precip": "annual_precip_mm", "av_det": "annual_detach_kgm2" }, axis=1) gdf = (smdf[smdf["jday"] == int(plantdate.strftime("%j"))].groupby( "year").mean()) df = df.join(gdf["sw1"]) df = df.rename({"sw1": "plant_soilmoist"}, axis=1) df["plant_date"] = plantdate.strftime("%m %d") df["mlra_id"] = hucdf.iloc[0]["MLRA"] df = df.fillna(0) LOG.info("done with %s %s", plantdate, hucdf.iloc[0]["MLRA"]) return df
def do_timing(self): sts = datetime.datetime.now() _ = dep.read_env(get_path('good_env.txt')) ets = datetime.datetime.now() print("%.5f reads per second" % (1. / (ets - sts).total_seconds(),)) self.assertEquals(1, 2)
def test_read(self): df = dep.read_env(get_path('good_env.txt')) df2 = df[df['date'] == datetime.date(2010, 6, 5)] self.assertEqual(len(df2.index), 1) row = df2.iloc[0] self.assertEquals(row['runoff'], 86.3)
def test_empty(self): df = dep.read_env(get_path('empty_env.txt')) self.assertEquals(len(df.index), 0)
def test_empty(): """don't error out on an empty ENV""" df = dep.read_env(get_path('empty_env.txt')) assert df.empty
"""Sundry.""" import os import glob import pyiem.dep as dep_utils os.chdir("/i/0/env/10230003/1504") for fn in glob.glob("*.env"): df0 = dep_utils.read_env(fn) df0.set_index("date", inplace=True) df7 = dep_utils.read_env("/i/7/env/10230003/1504/" + fn) df7.set_index("date", inplace=True) if (df7["sed_del"].sum() - df0["sed_del"].sum()) > 1000: print("--- Investigating: %s" % (fn, )) jdf = df0.join(df7, lsuffix="_s0", rsuffix="_s7") jdf["diff_sed_del"] = jdf["sed_del_s7"] - jdf["sed_del_s0"] jdf.sort_values(by="diff_sed_del", ascending=False, inplace=True) print(jdf[[ "precip_s7", "sed_del_s7", "sed_del_s0", "av_det_s7", "av_det_s0", "runoff_s7", "runoff_s0", ]].head(5))
def test_empty(self): """don't error out on an empty ENV""" df = dep.read_env(get_path('empty_env.txt')) self.assertEquals(len(df.index), 0)