def process_stock(stock_no, days): """处理单个stock""" csv_file = "%s/convert_%s.csv" % (conf.HISTORY_CONVERTED_PATH, stock_no) rows = common.load_csv(csv_file, fields) rows = list(rows) for i, row in enumerate(rows): print rolling(rows, i)
def plot_timeseries(v): figname = v.csv_file.replace('.csv', '.png') if os.path.exists(figname): return fig2 = plt.figure(num=2) plt.clf() ax2 = fig2.add_subplot(1, 1, 1) ts = load_csv(v.csv_file) # convert units for easier reading of graphs ts.index = convert_time_units_series(ts.index, years=True) # ts.plot(ax=ax2, label=v.simulation_set) l, = ax2.plot(ts.index, ts.values, label=v.simulation_set) ax2.legend() ax2.set_xlabel(ts.index.name) ax2.set_ylabel(v.units) ax2.set_title(name) # add yearly mean as well if o.yearly_mean: yearly_mean = ts.rolling(12).mean() l2, = ax2.plot(ts.index[::12], yearly_mean[::12], alpha=1, linewidth=2, color=l.get_color()) if o.png_timeseries: fig2.savefig(figname, dpi=o.dpi)
def get_freqstats_per_scenario_per_test(sc_list, freq_metrics): print "get_freqstats_per_scenario_per_test:Enter" all_freqdist_data = collections.OrderedDict() for ix, each_scenario in enumerate(sc_list): print each_scenario all_freqdist_data[each_scenario] = collections.OrderedDict() for each_mifint_freq_str in CUSTOM_CROPPING_PARAMS_ALL[ each_scenario].keys(): all_freqdist_data[each_scenario][each_mifint_freq_str] = {} DEVFREQ_MIFINT_PAIRS = [ int(s) if (("default" not in s) and ("test" not in s)) else s for s in each_mifint_freq_str.split("-") ] MIF_FREQ = DEVFREQ_MIFINT_PAIRS[0] INT_FREQ = DEVFREQ_MIFINT_PAIRS[1] DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) (c, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) for each_fmetric in freq_metrics: freq_data_dict = { 'sum': np.sum(perfdata[each_fmetric]), 'mode': _counter_mode(perfdata[each_fmetric]), 'mean': np.mean(perfdata[each_fmetric]), 'min': np.min(perfdata[each_fmetric]), 'max': np.max(perfdata[each_fmetric]), 'counter': collections.Counter(perfdata[each_fmetric]), 'transitions': np.count_nonzero(np.diff(perfdata[each_fmetric])) } all_freqdist_data[each_scenario][each_mifint_freq_str][ each_fmetric] = freq_data_dict return all_freqdist_data
def compute_corrmatrix_all_scenarios(sc_list, met_list): MIF_FREQ = "default" INT_FREQ = "default" all_sc_corr_mat = collections.OrderedDict() for each_scenario in sc_list: DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) lbl = "{0}:mif-{1}:int-{2}".format(SCENARIO_ID, MIF_FREQ, INT_FREQ) corr = compute_correlation_matrix(perfdata, met_list)[2] all_sc_corr_mat[each_scenario] = corr return all_sc_corr_mat
def plot_cpugpumem_dist_all_scenarios(sc_list, mif_freq, int_freq): fig, axs = plt.subplots(5, 3, figsize=(8 * 1.2, 8 * 1.2), sharex=True) axs = axs.ravel() for ix, each_scenario in enumerate(sc_list): DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( mif_freq, int_freq) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( mif_freq, int_freq) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) cpu_util = perfdata['cpu_cost'] gpu_util = perfdata['gpu_cost'] mem_util = perfdata['mem_cost'] bus_util = perfdata['sat_cost'] y_data = [cpu_util, gpu_util, bus_util] pos = np.arange(1, len(y_data) + 1) xticklbls = [ 'cpu_cost', 'gpu_cost', #'mem_cost', 'bus_cost' ] axs[ix].boxplot(y_data, positions=pos) axs[ix].set_xticks(pos) ymax = 80.0 if np.max([np.max(m) for m in y_data]) < 80.0 else np.max( [np.max(m) for m in y_data]) axs[ix].set_ylim([-0.5, ymax]) axs[ix].set_title(each_scenario) axs[-1].set_xticklabels(xticklbls, rotation=35) axs[-2].set_xticklabels(xticklbls, rotation=35) axs[-3].set_xticklabels(xticklbls, rotation=35)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('csv_file', nargs='+') parser.add_argument('--yearly-mean', action='store_true') parser.add_argument('--png', help='file name for saving png figure') o = parser.parse_args() fig2 = plt.figure() ax2 = plt.subplot(1, 1, 1) # compare string to find label records = [get_metadata(csv_file) for csv_file in o.csv_file] keep_label = { field: len(set(r[field] for r in records)) > 1 for field in records[0] } for csv_file in o.csv_file: ts = load_csv(csv_file) ts.index = convert_time_units_series(ts.index, years=True) record = get_metadata(csv_file) label = ', '.join( str(value) for key, value in record.items() if keep_label.get(key)) # name = ts.columns[1] path = Path(csv_file) name = path.name cname = ts.name color = None zorder = None # units = re.match(r'', ts.columns[1]) try: units, = re.match(r'.* \((.*)\)', cname).groups() except: logging.warning(f'failed to parse units: {cname}') units = '' l, = ax2.plot(ts.index, ts.values, alpha=0.5, label=label, linewidth=1 if o.yearly_mean else 2, color=color, zorder=zorder) # add yearly mean as well if o.yearly_mean: yearly_mean = ts.rolling(12).mean() l2, = ax2.plot(ts.index[::12], yearly_mean[::12], alpha=1, linewidth=2, color=l.get_color(), zorder=zorder) ax2.legend(fontsize='xx-small') ax2.set_ylabel(units) ax2.set_xlabel(ts.index.name) ax2.set_title(name) # ax2.set_xlim(xmin=start_year, xmax=2100) mi, ma = ax2.get_xlim() if mi < 0: ax2.set_xlim(xmin=0) # start at start_year (i.e. ERA5 start) if o.png: figname = o.png fig2.savefig(figname, dpi=300) else: plt.show()
def plot_freq_time_in_state(sc_list, metric, TMP_MIF_FREQ="default", TMP_INT_FREQ="default"): f, axarr = plt.subplots(2, 8, sharex=True, sharey=True, figsize=(16, 10)) f.canvas.set_window_title('plot_time_in_state -' + metric) axarr = axarr.ravel() # get colors and freq list (freq_list, colsd) = get_allfreq_list(metric) for ix, each_scenario in enumerate(sc_list): print each_scenario DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( TMP_MIF_FREQ, TMP_INT_FREQ) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( TMP_MIF_FREQ, TMP_INT_FREQ) try: axarr[ix].set_title(each_scenario, fontsize=12) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) freq_data = perfdata[metric] freq_data_dict = collections.OrderedDict() for each_freq in freq_list: fcount = freq_data.count(each_freq) if fcount > 0: freq_data_dict[each_freq] = fcount print(json.dumps(freq_data_dict, indent=4)) cols = [colsd[f] for f in freq_data_dict.keys()] pdata = freq_data_dict.values() lbls = [str(k) for k in freq_data_dict.keys()] wedges, plt_labels, junk = axarr[ix].pie(pdata, autopct=my_autopct, colors=cols) for t in plt_labels: t.set_fontsize(10) except: continue axarr[ix].set_title(each_scenario, fontsize=12) rects_list = [] rect_lbl_list = [] for ix, each_f in enumerate(freq_list): rec = patches.Rectangle((0.72, 0.1), 0.2, 0.6, facecolor=colsd[each_f]) rects_list.append(rec) rect_lbl_list.append(str(each_f)) nlines = len(freq_list) ncol = int(np.ceil(nlines / 2.)) leg = plt.figlegend(rects_list, rect_lbl_list, loc='upper center', ncol=ncol, labelspacing=0., fontsize=14) leg.draggable() plt.subplots_adjust(hspace=0.09, wspace=0.00, top=0.87, bottom=0.0, left=0.0, right=1.00)
def get_metric_slope_analysis(sc_list): print "get_freqstats_per_scenario_per_test:Enter" all_freqdist_data = {} for ix, each_scenario in enumerate(sc_list): print each_scenario all_freqdist_data[each_scenario] = collections.OrderedDict() DEVFREQ_MIFINT_PAIRS = ["default", "default"] MIF_FREQ = DEVFREQ_MIFINT_PAIRS[0] INT_FREQ = DEVFREQ_MIFINT_PAIRS[1] DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) (c, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) interested_metrics = [ 'cpu_util', 'sat_total', 'bus_mif_freq', 'bus_int_freq', 'cpu_freq' ] # get slope for interested metrics tmp_dict = {} # 1 or -1 or 0 per tick for each_metric in interested_metrics: tmp_dict[each_metric] = [] diff = np.diff(perfdata[each_metric]) # decrease precision if (each_metric == "cpu_util") or (each_metric == "sat_total"): diff = [x if abs(x) > 5.0 else 0.0 for x in diff] else: pass for x in diff: if x == 0: # no change tmp_dict[each_metric].append(0) elif x > 0: # positive tmp_dict[each_metric].append(1) elif x < 0: # negative tmp_dict[each_metric].append(-1) else: pass # construct result [1 2 3 4] samples = len(tmp_dict[interested_metrics[0]]) result = [] for i in xrange(samples): if (tmp_dict['cpu_util'][i] == 0) and (tmp_dict['sat_total'][i] == -1) and (tmp_dict['bus_mif_freq'][i] == -1) and (tmp_dict['cpu_freq'][i] == 1): result.append(1) elif (tmp_dict['cpu_util'][i] == -1) and (tmp_dict['sat_total'][i] == -1) and (tmp_dict['bus_mif_freq'][i] == -1) and (tmp_dict['cpu_freq'][i] == 0): result.append(2) elif (tmp_dict['cpu_util'][i] in [0, -1]) and (tmp_dict['bus_mif_freq'][i] == -1) and (tmp_dict['cpu_freq'][i] == 0): result.append(3) elif (tmp_dict['cpu_util'][i] in [0, -1]) and (tmp_dict['bus_mif_freq'][i] == -1) and (tmp_dict['cpu_freq'][i] == 1): result.append(4) else: result.append(0) all_freqdist_data[each_scenario] = { # 'trans_type1' : result.count(1), # 'trans_type2' : result.count(2), # 'trans_type3' : result.count(2), # 'trans_type4' : result.count(2), 'trans_type1_perc': float(result.count(1)) / float(len(result)), 'trans_type2_perc': float(result.count(2)) / float(len(result)), 'trans_type3_perc': float(result.count(3)) / float(len(result)), 'trans_type4_perc': float(result.count(4)) / float(len(result)), } pprint.pprint(all_freqdist_data)
import pandas as pd from sklearn import svm import sklearn.preprocessing as pp from common import load_csv, write_results_table # Configuration group_columns = ['LinkRef'] categorial_columns = [] meta_columns = ['JourneyLinkRef', 'JourneyRef', 'DateTime', 'LineDirectionLinkOrder', 'LinkName'] results = pd.DataFrame() # Load and pre-process data data = load_csv('data/4A_201701_Consistent.csv', group_columns = group_columns, categorial_columns = categorial_columns, meta_columns = meta_columns, n_lags = 20, n_headways = 0) for group, X, Y, meta in data: # Split data into train and test X_train, X_test = np.split(X, [int(.8*len(X))]) Y_train, Y_test = np.split(Y, [int(.8*len(Y))]) meta_train, meta_test = np.split(meta, [int(.8*len(meta))]) # Train print('Train data set (size, features):', X_train.shape) # Normalizing X and y:
def plot_all_simulations(): figname = os.path.join(o.output, loc_folder, asset_folder, 'all_' + name + '.png') if os.path.exists(figname): return fig3 = plt.figure(num=3) plt.clf() ax3 = fig3.add_subplot(1, 1, 1) for v in variables: ts = load_csv(v.csv_file) ts.index = convert_time_units_series(ts.index, years=True) if isinstance(v.datasets[0], ERA5): color = 'k' zorder = 5 else: color = None zorder = None # add yearly mean instead of monthly mean if o.yearly_mean: yearly_mean = ts.rolling(12).mean() x = ts.index[::12] y = yearly_mean[::12] else: x = ts.index y = ts.values l, = ax3.plot(x, y, alpha=0.5 if o.ensemble else 1, label=v.simulation_set, linewidth=1 if o.ensemble else 2, color=color, zorder=zorder) # Add ensemble mean if o.ensemble: for experiment in ensemble_files: df = load_csv(ensemble_files[experiment]) df.index = convert_time_units_series(df.index, years=True) if o.yearly_mean: yearly_mean = df.rolling(12).mean() x = df.index[::12] y = yearly_mean.iloc[::12] else: x = df.index y = df l, = ax3.plot(x, y["median"], alpha=1, label=f"{experiment} (median)", linewidth=2, zorder=4) ax3.plot(x, y["lower"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.plot(x, y["upper"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.fill_between(x, y["lower"], y["upper"], alpha=0.2, zorder=-1, color=l.get_color()) ax3.legend(fontsize='xx-small') ax3.set_ylabel(v.units) ax3.set_xlabel(ts.index.name) ax3.set_title(name) # ax3.set_xlim(xmin=start_year, xmax=2100) mi, ma = ax3.get_xlim() if mi < 0: ax3.set_xlim( xmin=0) # start at start_year (i.e. ERA5 start) if o.png_timeseries: fig3.savefig(figname, dpi=max(o.dpi, 300))
def main(): import argparse locations = yaml.safe_load(open('locations.yml')) variables_def = yaml.safe_load(open('indicators.yml')) assets = yaml.safe_load(open('assets.yml')) cmip6_yml = yaml.safe_load(open('cmip6.yml')) parser = argparse.ArgumentParser() parser.add_argument( "--max-workers", type=int, default=4, help= "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod." ) # g = parser.add_argument_group('variables or asset') g = parser.add_mutually_exclusive_group(required=True) # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)') # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download') g.add_argument('--indicators', nargs='*', default=[], choices=[vdef['name'] for vdef in variables_def], help='list of custom indicators to download') g.add_argument( '--asset', choices=list(assets.keys()), help= 'pre-defined list of variables, defined in assets.yml (experimental)') parser.add_argument( '--dataset', choices=['era5', 'cmip6'], help='dataset in combination with for `--indicators` and `--asset`') parser.add_argument('-o', '--output', default='indicators', help='output directory, default: %(default)s') parser.add_argument('--overwrite', action='store_true', help=argparse.SUPPRESS) g = parser.add_argument_group('location') g.add_argument('--location', choices=[loc['name'] for loc in locations], help='location name defined in locations.yml') g.add_argument('--lon', type=float) g.add_argument('--lat', type=float) g = parser.add_argument_group('area size controls') g.add_argument( '--area', nargs=4, type=float, help='area as four numbers: top, left, bottom, right (CDS convention)') g.add_argument( '--width-km', type=float, default=1000, help= "Width (km) around the selected location, when not provided by `area`. %(default)s km by default." ) g.add_argument( '--view', nargs=4, type=float, help= 'area for plot as four numbers: top, left, bottom, right (CDS convention)' ) g = parser.add_argument_group('ERA5 control') # g.add_argument('--year', nargs='+', default=list(range(1979, 2019+1)), help='ERA5 years to download, default: %(default)s') g.add_argument('--year', nargs='+', default=list(range(1979, 2019 + 1)), help=argparse.SUPPRESS) g = parser.add_argument_group('CMIP6 control') g.add_argument('--model', nargs='*', default=None, choices=get_all_models()) g.add_argument('--experiment', nargs='*', choices=cmip6_yml["experiments"], default=['ssp5_8_5']) # g.add_argument('--period', default=None, help=argparse.SUPPRESS) # all CMIP6 models and future experiements share the same parameter... # g.add_argument('--historical', action='store_true', help='this flag provokes downloading historical data as well and extend back the CMIP6 timeseries to 1979') g.add_argument('--historical', action='store_true', default=True, help=argparse.SUPPRESS) g.add_argument('--no-historical', action='store_false', dest='historical', help=argparse.SUPPRESS) # g.add_argument('--bias-correction', action='store_true', help='align CMIP6 variables with matching ERA5') g.add_argument('--bias-correction', action='store_true', default=True, help=argparse.SUPPRESS) g.add_argument('--no-bias-correction', action='store_false', dest='bias_correction', help='suppress bias-correction for CMIP6 data') g.add_argument( '--reference-period', default=[1979, 2019], nargs=2, type=int, help='reference period for bias correction (default: %(default)s)') g.add_argument('--yearly-bias', action='store_true', help='yearly instead of monthly bias correction') g.add_argument( '--ensemble', action='store_true', help= 'If `--model` is not specified, default to all available models. Also write a csv file with all models as columns, as well as median, lower and upper (5th and 95th percentiles) fields.' ) g = parser.add_argument_group('visualization') g.add_argument('--view-region', action='store_true') g.add_argument('--view-timeseries', action='store_true') g.add_argument('--png-region', action='store_true') g.add_argument('--png-timeseries', action='store_true') g.add_argument('--dpi', default=100, type=int, help='dop-per-inches (default: %(default)s)') g.add_argument('--yearly-mean', action='store_true') o = parser.parse_args() if not (o.location or (o.lon and o.lat)): parser.error( 'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`' ) elif o.location: loc = {loc['name']: loc for loc in locations}[o.location] o.lon, o.lat = loc['lon'], loc['lat'] if 'area' in loc and not o.area: o.area = loc['area'] if not o.area: o.area = make_area(o.lon, o.lat, o.width_km) print('lon', o.lon) print('lat', o.lat) if not o.asset and not o.indicators: parser.error( 'please provide indicators, for example: `--indicators 2m_temperature` or asset, e.g. `--asset energy`' ) # assets only contain indicators if o.asset: for vname in assets[o.asset]: if vname not in [v['name'] for v in variables_def]: parser.error( f'unknown indicator in assets.yml: {vname}. See indicators.yml for indicator definition' ) o.indicators.append(vname) # folder structure for CSV results loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E' asset_folder = o.asset if o.asset else 'all' if o.model is None: if o.ensemble: o.model = get_all_models() else: o.model = 'mpi_esm1_2_lr' # loop over indicators vdef_by_name = {v['name']: v for v in variables_def} for name in o.indicators: variables = [] # each variable for the simulation set vdef = vdef_by_name[name] indicator_def = dict(name=name, units=vdef.get('units'), description=vdef.get('description'), scale=vdef.get('scale', 1), offset=vdef.get('offset', 0)) vdef2 = vdef.get('era5', {}) era5_kwargs = dict(area=o.area, year=o.year) era5 = parse_indicator(ERA5, defs=vdef2, cls_kwargs=era5_kwargs, **indicator_def) era5.simulation_set = 'ERA5' era5.set_folder = 'era5' era5.alias = name if not o.dataset or o.dataset == 'era5' or o.bias_correction: variables.append(era5) vdef2 = vdef.get('cmip6', {}) transform = Transform(vdef2.get('scale', 1), vdef2.get('offset', 0)) if not o.dataset or o.dataset == 'cmip6': for model in o.model: labels = { x: "{}-{}.{}".format(*x.split("_")) for x in cmip6_yml["experiments"] } # if o.historical: # historical_kwargs = dict(model=model, experiment='historical') # historical = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=historical_kwargs, **indicator_def) # else: # historical = None for experiment in o.experiment: cmip6_kwargs = dict(model=model, experiment=experiment, historical=o.historical, area=o.area) cmip6 = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=cmip6_kwargs, **indicator_def) cmip6.reference = era5 cmip6.simulation_set = f'CMIP6 - {labels.get(experiment, experiment)} - {model}' cmip6.set_folder = f'cmip6-{model}-{experiment}' cmip6.alias = name # print("indicator variable", experiment, [d.name for d in cmip6.datasets]) variables.append(cmip6) if not variables: logging.warning(f'no variable for {name}') continue if o.max_workers < 2: variables2 = download_all_variables_serial(variables) else: variables2 = download_all_variables(variables) # Diagnose which variables have been excluded names = list(set([v.name for v in variables])) names2 = list(set([v.name for v in variables2])) models = list( set([ v.datasets[0].model for v in variables if isinstance(v.datasets[0], CMIP6) ])) models2 = list( set([ v.datasets[0].model for v in variables2 if isinstance(v.datasets[0], CMIP6) ])) print(f"Downloaded {len(variables2)} out of {len(variables)}") print(f"... {len(names2)} out of {len(names)} variable types") print(f"... {len(models2)} out of {len(models)} models") print("CMIP6 models excluded:", " ".join([m for m in models if m not in models2])) print("CMIP6 models included:", " ".join(models2)) variables = variables2 # download and convert to csv for v in variables: folder = os.path.join(o.output, loc_folder, asset_folder, v.set_folder) v.csv_file = os.path.join(folder, (v.alias or v.variable) + '.csv') if os.path.exists(v.csv_file): print("Already exitst:", v.csv_file) continue series = v.load_timeseries(o.lon, o.lat, overwrite=o.overwrite) bias_correction_method = vdef.get('bias-correction') if o.bias_correction and isinstance( v.datasets[0], CMIP6) and bias_correction_method is not None: era5 = v.reference.load_timeseries(o.lon, o.lat) #v.set_folder += '-unbiased' if o.yearly_bias: series = correct_yearly_bias(series, era5, o.reference_period, bias_correction_method) else: series = correct_monthly_bias(series, era5, o.reference_period, bias_correction_method) os.makedirs(folder, exist_ok=True) print("Save to", v.csv_file) save_csv(series, v.csv_file) if o.ensemble: ensemble_files = {} import cftime, datetime for experiment in o.experiment: ensemble_variables = [ v for v in variables if isinstance(v.datasets[0], CMIP6) and v.datasets[0].experiment == experiment ] dates = np.array([ cftime.DatetimeGregorian(y, m, 15) for y in range(1979, 2100 + 1) for m in range(1, 12 + 1) ]) index = pd.Index(cftime.date2num(dates, time_units), name=time_units) df = {} for v in ensemble_variables: series = load_csv(v.csv_file) series.index = index[:len(series)] df[v.datasets[0].model] = series df = pd.DataFrame(df) median = df.median(axis=1) lower = df.quantile(.05, axis=1) upper = df.quantile(.95, axis=1) df["median"] = median df["lower"] = lower df["upper"] = upper first = ensemble_variables[0] folder = os.path.join( o.output, loc_folder, asset_folder, first.set_folder.replace(first.datasets[0].model, "ensemble")) csv_file = os.path.join(folder, first.alias or first.name) + '.csv' ensemble_files[experiment] = csv_file os.makedirs(folder, exist_ok=True) print("Save to", csv_file) save_csv(df, csv_file) if o.view_region or o.view_timeseries or o.png_region or o.png_timeseries: import matplotlib.pyplot as plt cb = None try: import cartopy import cartopy.crs as ccrs kwargs = dict(projection=ccrs.PlateCarree()) except ImportError: logging.warning('install cartopy to benefit from coastlines') cartopy = None kwargs = {} if o.view is None: o.view = o.area def plot_timeseries(v): figname = v.csv_file.replace('.csv', '.png') if os.path.exists(figname): return fig2 = plt.figure(num=2) plt.clf() ax2 = fig2.add_subplot(1, 1, 1) ts = load_csv(v.csv_file) # convert units for easier reading of graphs ts.index = convert_time_units_series(ts.index, years=True) # ts.plot(ax=ax2, label=v.simulation_set) l, = ax2.plot(ts.index, ts.values, label=v.simulation_set) ax2.legend() ax2.set_xlabel(ts.index.name) ax2.set_ylabel(v.units) ax2.set_title(name) # add yearly mean as well if o.yearly_mean: yearly_mean = ts.rolling(12).mean() l2, = ax2.plot(ts.index[::12], yearly_mean[::12], alpha=1, linewidth=2, color=l.get_color()) if o.png_timeseries: fig2.savefig(figname, dpi=o.dpi) def plot_region(v): v0 = v.datasets[0] figname = v.csv_file.replace('.csv', '-region.png') if os.path.exists(figname): return fig1 = plt.figure(num=1) plt.clf() ax1 = fig1.add_subplot(1, 1, 1, **kwargs) if isinstance(v.datasets[0], ERA5): y1, y2 = o.reference_period roll = False title = f'ERA5: {y1}-{y2}' else: y1, y2 = 2071, 2100 roll = True if o.view[1] < 0 else False title = f'{labels.get(v0.experiment, v0.experiment)} ({v0.model}): {y1}-{y2}' refslice = slice(str(y1), str(y2)) map = v.load_cube(time=refslice, area=o.view, roll=roll).mean(dim='time') h = ax1.imshow(map.values[::-1], extent=cube_area(map, extent=True)) cb = plt.colorbar(h, ax=ax1, label=f'{name} ({v.units})') # h = map.plot(ax=ax1, cbar_kwargs={'label':f'{v.units}'}, robust=True) ax1.set_title(title) ax1.plot(o.lon, o.lat, 'ko') if cartopy: ax1.coastlines(resolution='10m') if o.png_region: fig1.savefig(figname, dpi=o.dpi) for v in variables: if o.view_timeseries or o.png_timeseries: plot_timeseries(v) if o.view_region or o.png_region: try: plot_region(v) except: logging.warning(f'failed to make map for {v.name}') # all simulation sets on one figure def plot_all_simulations(): figname = os.path.join(o.output, loc_folder, asset_folder, 'all_' + name + '.png') if os.path.exists(figname): return fig3 = plt.figure(num=3) plt.clf() ax3 = fig3.add_subplot(1, 1, 1) for v in variables: ts = load_csv(v.csv_file) ts.index = convert_time_units_series(ts.index, years=True) if isinstance(v.datasets[0], ERA5): color = 'k' zorder = 5 else: color = None zorder = None # add yearly mean instead of monthly mean if o.yearly_mean: yearly_mean = ts.rolling(12).mean() x = ts.index[::12] y = yearly_mean[::12] else: x = ts.index y = ts.values l, = ax3.plot(x, y, alpha=0.5 if o.ensemble else 1, label=v.simulation_set, linewidth=1 if o.ensemble else 2, color=color, zorder=zorder) # Add ensemble mean if o.ensemble: for experiment in ensemble_files: df = load_csv(ensemble_files[experiment]) df.index = convert_time_units_series(df.index, years=True) if o.yearly_mean: yearly_mean = df.rolling(12).mean() x = df.index[::12] y = yearly_mean.iloc[::12] else: x = df.index y = df l, = ax3.plot(x, y["median"], alpha=1, label=f"{experiment} (median)", linewidth=2, zorder=4) ax3.plot(x, y["lower"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.plot(x, y["upper"], linewidth=1, zorder=4, linestyle="--", color=l.get_color()) ax3.fill_between(x, y["lower"], y["upper"], alpha=0.2, zorder=-1, color=l.get_color()) ax3.legend(fontsize='xx-small') ax3.set_ylabel(v.units) ax3.set_xlabel(ts.index.name) ax3.set_title(name) # ax3.set_xlim(xmin=start_year, xmax=2100) mi, ma = ax3.get_xlim() if mi < 0: ax3.set_xlim( xmin=0) # start at start_year (i.e. ERA5 start) if o.png_timeseries: fig3.savefig(figname, dpi=max(o.dpi, 300)) if o.view_timeseries or o.png_timeseries: plot_all_simulations() if o.view_timeseries or o.view_region: plt.show()
def plot_cpugpumem_dist_all_scenarios(sc_list, mif_freq, int_freq, nrows=2): cmap = plt.get_cmap('rainbow') colsd = [cmap(i) for i in np.linspace(0, 1, 6)] colsd = [ '#08519c', '#6baed6', # blues #'#a50f15', '#fb6a4a', # reds '#006d2c', '#74c476', # greens ] nrows = nrows fig, axs = plt.subplots(nrows, int(np.ceil(len(sc_list) / float(nrows))), figsize=(12 * 1.2, 5 * 1.2), sharex=True, sharey=True) axs = axs.ravel() xticklbls = [ 'cpu_util', 'cpu_cost', #'gpu_util', 'gpu_cost', 'mem_util', 'mem_cost' ] for ix, each_scenario in enumerate(sc_list): print each_scenario DATA_DIR = BASE_DATA_DIR + each_scenario + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( mif_freq, int_freq) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format( mif_freq, int_freq) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) cpu_util = perfdata['cpu_util'] gpu_util = perfdata['gpu_util'] mem_util = perfdata['sat_total'] cpu_cost = perfdata['cpu_cost'] gpu_cost = perfdata['gpu_cost'] mem_cost = perfdata['sat_cost'] y_data = [ cpu_util, cpu_cost, #gpu_util, gpu_cost, mem_util, mem_cost ] pos = np.arange(1, len(y_data) + 1) bp = axs[ix].boxplot(y_data, positions=pos, patch_artist=True, widths=0.7) # change col of boxes for box, c in zip(bp['boxes'], colsd): box.set(facecolor=c) # change fill color # change outline color box.set(color='#000000', linewidth=1) # change fill color box.set(facecolor=c) i += 1 ## change color and linewidth of the whiskers for whisker in bp['whiskers']: whisker.set(color='#000000', linewidth=1, linestyle='-') ## change color and linewidth of the caps for cap in bp['caps']: cap.set(color='#000000', linewidth=1) ## change color and linewidth of the medians for median in bp['medians']: median.set(color='#000000', linewidth=1) ## change the style of fliers and their fill for flier, c in zip(bp['fliers'], colsd): flier.set(marker='x', color=c) axs[ix].set_xticks(pos) ymax = 100.0 if np.max([np.max(m) for m in y_data]) < 100.0 else np.max( [np.max(m) for m in y_data]) axs[ix].set_ylim([-0.5, ymax]) axs[ix].set_title(each_scenario, fontsize=14) axs[ix].tick_params(axis='y', labelsize=14) axs[ix].xaxis.grid(False) axs[ix].yaxis.grid(True) for ix in np.arange(1, int(len(sc_list) / float(nrows)) + 1): #axs[-1*ix].set_xticklabels(xticklbls, rotation=35, fontsize=12) axs[-1 * ix].set_xticklabels([]) #axs[8].set_ylim([0,150]) # legend rect_lbl_list = xticklbls cols = colsd rects_list = [] for ix, each_rect in enumerate(rect_lbl_list): rec = patches.Rectangle((0.72, 0.1), 0.2, 0.6, facecolor=cols[ix]) rects_list.append(rec) leg = plt.figlegend(rects_list, rect_lbl_list, loc='upper center', ncol=len(rects_list) / 2, labelspacing=0., fontsize=14, frameon=False) leg.get_frame().set_facecolor('#FFFFFF') leg.get_frame().set_linewidth(0.0) leg.draggable() plt.subplots_adjust(top=0.88, left=0.025, right=0.995, bottom=0.02, wspace=0.25)
################# # MAIN code ################# SCENARIO_ID = "idle1" DATA_DIR = BASE_DATA_DIR + SCENARIO_ID + "/" MIF_FREQ = "default" INT_FREQ = "default" #MIF_FREQ = 800000 #INT_FREQ = 800000 cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format( MIF_FREQ, INT_FREQ) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(MIF_FREQ, INT_FREQ) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) # plot_cross_correlation(perfdata['bus_mif_freq'], # perfdata['cpu_util_freq']) # temp crop data #perfdata = _crop_pefdata(perfdata, 586, -1) lbl = "{0}:mif-{1}:int-{2}".format(SCENARIO_ID, MIF_FREQ, INT_FREQ) #corr_matrix = compute_correlation_matrix(perfdata, reduced_target_metrics_order_nogpu)[1] #plot_corr_matrix(corr_matrix, reduced_target_metrics_order_nogpu, SCENARIO_ID) # plot_overlapped(perfdata, ['cpu_freq', # 'bus_mif_freq', 'bus_int_freq', # 'cpu_util', 'cpu_cost',
import matplotlib.pyplot as plt from common import load_csv import sys x = [] y = [] c = ['r', 'g', 'b', 'c', 'm', 'y', 'k'] KLASS = len(c) data = load_csv(sys.argv[1]) for i in xrange(0,KLASS): x = [float(dat[4]) for dat in data if int(dat[0])%KLASS == i] y = [float(dat[3]) for dat in data if int(dat[0])%KLASS == i] plt.plot(x, y, c[i]) plt.show()
def plot_scatter_allsc_allfreq(scenario_list, scatter_plot=True, line_plot=True): mean_mem_cost = [] mean_cpu_cost = [] mean_gpu_cost = [] mean_all_metrics = {} CPU_METRIC = 'cpu_cost' MEM_METRIC = 'sat_cost' GPU_METRIC = 'gpu_cost' for each_sc in scenario_list: print each_sc possible_freqs = CUSTOM_CROPPING_PARAMS_ALL[each_sc].keys() tmp_mem = [] tmp_cpu = [] tmp_gpu = [] tmp_fix = [] for fix, each_f in enumerate(all_mifint_freqs_macroworkload): #print possible_freqs if each_f in possible_freqs: #print each_sc, each_f miffreq, intfreq = mif_int_freqstr_to_tuple(each_f) # get sc data DATA_DIR = BASE_DATA_DIR + each_sc + "/" cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(miffreq, intfreq) mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(miffreq, intfreq) (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname) m = markers_and_cols_per_scenario[each_sc][0] c = markers_and_cols_per_scenario[each_sc][1] mean_cpu_cost.append([fix, np.mean(perfdata[CPU_METRIC]), m, c]) mean_gpu_cost.append([fix, np.mean(perfdata[GPU_METRIC]), m, c]) mean_mem_cost.append([fix, np.mean(perfdata[MEM_METRIC]), m, c]) tmp_cpu.append(np.mean(perfdata[CPU_METRIC])) tmp_gpu.append(np.mean(perfdata[GPU_METRIC])) tmp_mem.append(np.mean(perfdata[MEM_METRIC])) tmp_fix.append(fix) else: pass # populate dicts mean_all_metrics[each_sc] = { 'cpu': tmp_cpu, 'gpu': tmp_gpu, 'mem': tmp_mem, 'fix' : tmp_fix, 'marker' : markers_and_cols_per_scenario[each_sc][0], 'col' : markers_and_cols_per_scenario[each_sc][1] } ##### only scatters #### if scatter_plot == True: fig, axs = plt.subplots(1,3) fig.canvas.set_window_title("plot_scatter_allsc_allfreq") axs = axs.ravel() axi = 0 titles = [MEM_METRIC.replace('sat','mem'), CPU_METRIC, GPU_METRIC] for each_metric_data in [ mean_mem_cost, mean_cpu_cost,mean_gpu_cost ]: x,y, m, c = zip(*each_metric_data) for i, each_x in enumerate(x): axs[axi].scatter([x[i]],[y[i]],marker=m[i], color=c[i], s=90, linewidth=3.5) axs[axi].hold(True) axs[axi].set_title(titles[axi]) axs[axi].xaxis.grid(False) axs[axi].yaxis.grid(False) axi+=1 # legend rect_lbl_list = scenario_list cols = [markers_and_cols_per_scenario[s][1] for s in scenario_list] markers = [markers_and_cols_per_scenario[s][0] for s in scenario_list] artist_list = [] for ix, each_rect in enumerate(rect_lbl_list): a = plt.Line2D((0,1),(0,0), color=cols[ix], marker=markers[ix], linestyle='', mew=1.5, ms=13, mec=cols[ix]) artist_list.append(a) leg = plt.figlegend( artist_list, rect_lbl_list, loc = 'upper center', ncol=len(artist_list)/4, labelspacing=0. , fontsize=13, frameon=False, numpoints=1) leg.get_frame().set_facecolor('#FFFFFF') leg.get_frame().set_linewidth(0.0) leg.draggable() ##### line and scatters #### if line_plot == True: fsize=14 titles = [CPU_METRIC, GPU_METRIC] metrics = ['cpu', 'gpu'] fig, axs = plt.subplots(1,len(metrics), figsize=(12.1, 5.5)) fig.canvas.set_window_title("plot_line_allsc_allfreq") axs = axs.ravel() #titles = [MEM_METRIC.replace('sat','mem'), CPU_METRIC, GPU_METRIC] #metrics = ['mem', 'cpu', 'gpu'] for axi, each_met in enumerate(metrics): ax_ymax = 0 for each_sc_k, each_sc_data in mean_all_metrics.iteritems(): m = each_sc_data['marker'] c = each_sc_data['col'] x = each_sc_data['fix'] y = each_sc_data[each_met] if np.max(y) > ax_ymax: ax_ymax=np.max(y) print x print y print "---" axs[axi].plot(x,y,marker=m, color=c, linewidth=2, linestyle='', mew=2, ms=10, markeredgecolor=c) axs[axi].hold(True) axs[axi].plot(x,y, color=c, linewidth=1, linestyle='-', alpha=0.2) axs[axi].hold(True) axs[axi].set_xlim([-1, len(all_mifint_freqs_macroworkload)]) axs[axi].set_ylim([-2, ax_ymax*1.05]) axs[axi].set_title(titles[axi], fontsize=fsize) axs[axi].xaxis.grid(False) axs[axi].yaxis.grid(False) axs[axi].set_xticks(np.arange(0,len(all_mifint_freqs_macroworkload))) axs[axi].set_xticklabels([f.replace('000','').replace('default-','') for f in all_mifint_freqs_macroworkload], rotation=45, fontsize=fsize) axs[axi].tick_params(axis='both', labelsize=fsize) # legend rect_lbl_list = scenario_list cols = [markers_and_cols_per_scenario[s][1] for s in scenario_list] markers = [markers_and_cols_per_scenario[s][0] for s in scenario_list] artist_list = [] for ix, each_rect in enumerate(rect_lbl_list): a = plt.Line2D((0,1),(0,0), color=cols[ix], marker=markers[ix], linestyle='', mew=1.5, ms=13, mec=cols[ix]) artist_list.append(a) leg = plt.figlegend( artist_list, rect_lbl_list, loc = 'upper center', ncol=len(artist_list)/4, labelspacing=0. , fontsize=fsize, frameon=False, numpoints=1, columnspacing=1.0) leg.get_frame().set_facecolor('#FFFFFF') leg.get_frame().set_linewidth(0.0) leg.draggable() plt.subplots_adjust(top=0.83, left=0.03, right=0.99, bottom=0.15, wspace=0.14, hspace=0.18)