def show_errors( time, Y_true, Y_predict, with_graphs=False ): mae = utils.mean_absolute_error( Y_true, Y_predict ) mape = utils.mean_absolute_percentage_error( Y_true, Y_predict, epsilon=1.0 ) mse = utils.mean_squared_error( Y_true, Y_predict ) print( 'MSE %f ' % mse.mean() ) print( 'MAE %f ' % mae.mean() ) print( 'MAPE %7.3f%% ' % mape.mean() ) if with_graphs: pyplot.plot( time, Y_predict[:,0], color='blue', lw=7, alpha=0.2 ) pyplot.plot( time, Y_predict[:,1], color='green', lw=7, alpha=0.2 ) pyplot.plot( time, Y_predict[:,2], color='red', lw=7, alpha=0.2 ) pyplot.plot( time, Y_true[:,0], color='blue', lw=2 ) pyplot.plot( time, Y_true[:,1], color='green', lw=2 ) pyplot.plot( time, Y_true[:,2], color='red', lw=2 ) pyplot.grid() pyplot.show() pyplot.plot( time, mape, color='red', lw=1 ) pyplot.grid() pyplot.show()
def generate_plot_from_csv(name, dataset, ds_type, cropsize=224, dirname="predictions/debug-448/"): """ Generate plots from CSV Parameters --------- name: Model name (vgg16baseline or vgg16decoder) dataset: Dataset name (SHHA or SHHB) ds_type: Set type (train or test) cropsize: Input image crop size """ fname = f"{dirname}/{name}_{dataset}_{ds_type}_predictions_{cropsize}.csv" df = pd.read_csv(fname) df['diff'] = df.true_labels - df.predicted_labels scatter = alt.Chart(df).mark_circle().encode( alt.X("true_labels"), alt.Y("predicted_labels"), alt.Tooltip(["true_labels", "predicted_labels"])) line = alt.Chart(df).mark_line().encode(alt.X('true_labels', title="True"), alt.Y('true_labels', title="Predicted"), color=alt.value('rgb(0,0,0)')) mse = mean_squared_error(df.true_labels.values, df.predicted_labels.values) mae = mean_absolute_error(df.true_labels.values, df.predicted_labels.values) chart = (scatter + line).properties( title= f"INPUT {cropsize}, {dataset}:{ds_type.upper()}, MSE: {mse} | MAE: {mae}" ) return chart
print('Warning nans detected in test data') if o > 0: # updatig predictors Tr_Xtr = Xtr[:, 4:] Tr_Xte = Xte[:, 4:] coeffs_tr = fit_pixel(Tr_Xtr, ytr) y_test_pred_tr = predict_pixel(Tr_Xte, coeffs_tr) y_train_pred_tr = predict_pixel(Tr_Xtr, coeffs_tr) mse_test_tr = mean_squared_error(y_test_pred_tr, yte) mse_train_tr = mean_squared_error(y_train_pred_tr, ytr) mae_test_tr = mean_absolute_error(y_test_pred_tr, yte) mae_train_tr = mean_absolute_error(y_train_pred_tr, ytr) ############# Fitting coeffs = fit_pixel(Xtr, ytr) y_test_pred = predict_pixel(Xte, coeffs) y_train_pred = predict_pixel(Xtr, coeffs) ################ Evaluation mse_test = mean_squared_error(y_test_pred, yte) mse_train = mean_squared_error(y_train_pred, ytr) mae_test = mean_squared_error(y_test_pred, yte) mae_train = mean_squared_error(y_train_pred, ytr)
#-*- coding:utf-8 -*- #author: wenzhu import scipy.io import pandas as pd from utils import mean_absolute_error,mean_squared_error,root_mean_squared_error,r2_score #读取数据 pred = pd.read_csv("saved_results/model_conv2d/predict.csv") real = pd.read_csv("saved_results/model_conv2d/real_data.csv") pred = pred.values real = real.values #计算指标 mae = mean_absolute_error(real, pred) mse = mean_squared_error(real, pred) rmse = root_mean_squared_error(real, pred) r2 = r2_score(real,pred) print('mae:', mae) print('mse:', mse) print('rmse:', rmse) print('r2_score:',r2)
def fitness(self, **kwargs): original_kwargs = kwargs.copy() window_size = kwargs.pop('window_size') num_points_to_predict = kwargs.pop('num_points_to_predict') num_derivatives = kwargs.pop('num_derivatives') epochs = kwargs.pop('epochs') batch_size = kwargs.pop('batch_size') scaler_class = kwargs.pop('scaler_class', utils.StandardScaler) overall_y_true = [] overall_y_pred = [] # Training print('\n\nCreating model: \n\t{0}'.format(original_kwargs)) model = create_model((window_size, num_derivatives + 1), num_points_to_predict, **kwargs) model.save_weights('initial_weights.h5') for train_t, test_t in utils.roll_cv(self.t_raw, folds=4, backtrack_padding=window_size-1): train_x, train_y = utils.as_sequences(train_t, window_size, num_derivatives, num_points_to_predict) test_x, test_y = utils.as_sequences(test_t, window_size, num_derivatives, num_points_to_predict) scaler_x = utils.SequenceScaler(scaler_class) scaler_y = scaler_class() train_x_scaled = scaler_x.fit_transform(train_x) train_y_scaled = scaler_y.fit_transform(train_y) test_x_scaled = scaler_x.transform(test_x) test_y_scaled = scaler_y.transform(test_y) print('Fitting') model.fit(train_x_scaled, train_y_scaled, epochs=epochs, batch_size=batch_size, verbose=0) pred_y_scaled = model.predict(test_x_scaled) pred_y = scaler_y.inverse_transform(pred_y_scaled) overall_y_true.append(test_y) overall_y_pred.append(pred_y) print('Reset weights') model.load_weights('initial_weights.h5') all_y_true = np.concatenate(overall_y_true) all_y_pred = np.concatenate(overall_y_pred) mse = utils.mean_squared_error(all_y_true, all_y_pred) mae = utils.mean_absolute_error(all_y_true, all_y_pred) print('MSE', mse) print('MAE', mae) fitness = -mse import matplotlib.pyplot as plt plt.plot(all_y_pred[:, 0], label='Predicted') plt.plot(all_y_true[:, 0], label='True') plt.grid() plt.legend() plt.show() log_kwargs = original_kwargs.copy() log_kwargs.update({'mse': mse, 'mae': mae}) print('Trained: {0}'.format(log_kwargs)) print('Fitness: {0:.3f}'.format(fitness)) if not self.log_writer: self.log_file = open(self.log_path, 'a') self.log_writer = csv.DictWriter(self.log_file, fieldnames=sorted(log_kwargs.keys())) self.log_writer.writeheader() if self.log_writer: self.log_writer.writerow(log_kwargs) self.log_file.flush() return fitness
def generate(data): # Turn on evaluation mode which disables dropout. model.eval() idss_predict = [] context_predict = [] rating_predict = [] with torch.no_grad(): while True: user, item, rating, seq, feature = data.next_batch() user = user.to(device) # (batch_size,) item = item.to(device) bos = seq[:, 0].unsqueeze(0).to(device) # (1, batch_size) feature = feature.t().to(device) # (1, batch_size) if args.use_feature: text = torch.cat([feature, bos], 0) # (src_len - 1, batch_size) else: text = bos # (src_len - 1, batch_size) start_idx = text.size(0) for idx in range(args.words): # produce a word at each step if idx == 0: log_word_prob, log_context_dis, rating_p, _ = model( user, item, text, False ) # (batch_size, ntoken) vs. (batch_size, ntoken) vs. (batch_size,) rating_predict.extend(rating_p.tolist()) context = predict(log_context_dis, topk=args.words) # (batch_size, words) context_predict.extend(context.tolist()) else: log_word_prob, _, _, _ = model( user, item, text, False, False, False) # (batch_size, ntoken) word_prob = log_word_prob.exp() # (batch_size, ntoken) word_idx = torch.argmax( word_prob, dim=1 ) # (batch_size,), pick the one with the largest probability text = torch.cat([text, word_idx.unsqueeze(0)], 0) # (len++, batch_size) ids = text[start_idx:].t().tolist() # (batch_size, seq_len) idss_predict.extend(ids) if data.step == data.total_step: break # rating predicted_rating = [ (r, p) for (r, p) in zip(data.rating.tolist(), rating_predict) ] RMSE = root_mean_square_error(predicted_rating, corpus.max_rating, corpus.min_rating) print(now_time() + 'RMSE {:7.4f}'.format(RMSE)) MAE = mean_absolute_error(predicted_rating, corpus.max_rating, corpus.min_rating) print(now_time() + 'MAE {:7.4f}'.format(MAE)) # text tokens_test = [ ids2tokens(ids[1:], word2idx, idx2word) for ids in data.seq.tolist() ] tokens_predict = [ ids2tokens(ids, word2idx, idx2word) for ids in idss_predict ] BLEU1 = bleu_score(tokens_test, tokens_predict, n_gram=1, smooth=False) print(now_time() + 'BLEU-1 {:7.4f}'.format(BLEU1)) BLEU4 = bleu_score(tokens_test, tokens_predict, n_gram=4, smooth=False) print(now_time() + 'BLEU-4 {:7.4f}'.format(BLEU4)) USR, USN = unique_sentence_percent(tokens_predict) print(now_time() + 'USR {:7.4f} | USN {:7}'.format(USR, USN)) feature_batch = feature_detect(tokens_predict, feature_set) DIV = feature_diversity(feature_batch) # time-consuming print(now_time() + 'DIV {:7.4f}'.format(DIV)) FCR = feature_coverage_ratio(feature_batch, feature_set) print(now_time() + 'FCR {:7.4f}'.format(FCR)) feature_test = [idx2word[i] for i in data.feature.squeeze(1).tolist()] # ids to words FMR = feature_matching_ratio(feature_batch, feature_test) print(now_time() + 'FMR {:7.4f}'.format(FMR)) text_test = [' '.join(tokens) for tokens in tokens_test] text_predict = [' '.join(tokens) for tokens in tokens_predict] tokens_context = [ ' '.join([idx2word[i] for i in ids]) for ids in context_predict ] ROUGE = rouge_score(text_test, text_predict) # a dictionary for (k, v) in ROUGE.items(): print(now_time() + '{} {:7.4f}'.format(k, v)) text_out = '' for (real, ctx, fake) in zip(text_test, tokens_context, text_predict): text_out += '{}\n{}\n{}\n\n'.format(real, ctx, fake) return text_out
predict_pxiel_class = torch.max(nn.functional.softmax( predict_map_valid, dim=1), dim=1).indices predict_pxiel_class = predict_pxiel_class.cpu().numpy( ) # batch_size*height*width mask_valid = mask_valid.numpy() #batch_size*height*width dice = dice_coeff_multiclass(predict_pxiel_class, mask_valid, num_class) dice_sum += dice #--------calculate mean absolute error of direct area estimation predict_area_valid = predict_area_valid.cpu().detach().numpy( ) #batch_size*num_class gt_area_valid = area_vector(mask_valid, num_class) #batch_size*num_class estimate_mae = mean_absolute_error(predict_area_valid, gt_area_valid) estimate_mae_sum += estimate_mae #--------calculate mean absolute error of area by segmentation segment_area_valid = area_vector(predict_pxiel_class, num_class) segment_mae = mean_absolute_error(segment_area_valid, gt_area_valid) segment_mae_sum += segment_mae print("average validate dice ", dice_sum / (step + 1), "average validate mae ", estimate_mae_sum / (step + 1), segment_mae_sum / (step + 1))
def train_ar_model(transform=False, bias=False, sig=False, order=0, overwrite_results=True): path_transform = '/home/hanna/lagrings/results/stats/2014-01-01_2018-12-31/' path = '/home/hanna/lagrings/ar_data/' #print(bias) #print(transform) if transform and bias: print('Not valid model....') raise OSError('Not valid model config') # path_transform = '/home/hanna/lagrings/results/stats/2014-01-01_2018-12-31/' # path = '/home/hanna/lagrings/ar_data/' lagr_path = '/uio/lagringshotell/geofag/students/metos/hannasv/' path_transform = '{}results/stats/2014-01-01_2018-12-31'.format(lagr_path) path = '{}ar_data/'.format(lagr_path) path_ar_results = '{}/results/ar/'.format(lagr_path) latitude = 30.0 longitude = 5.25 SPATIAL_RESOLUTION = 0.25 latitudes = np.arange(30.0, 50.0 + SPATIAL_RESOLUTION, step=SPATIAL_RESOLUTION) longitudes = np.arange(-15, 25 + SPATIAL_RESOLUTION, step=SPATIAL_RESOLUTION) base = '{}/results/stats/2014-01-01_2018-12-31/'.format(lagr_path) if transform: ds_tcc = xr.open_dataset(base + 'stats_pixel_tcc_all.nc') ds_r = xr.open_dataset(base + 'stats_pixel_r_all.nc') ds_q = xr.open_dataset(base + 'stats_pixel_q_all.nc') ds_t2m = xr.open_dataset(base + 'stats_pixel_t2m_all.nc') ds_sp = xr.open_dataset(base + 'stats_pixel_sp_all.nc') stats_data = { 'q': ds_q, 't2m': ds_t2m, 'r': ds_r, 'sp': ds_sp, 'tcc': ds_tcc } explaination = ['q', 't2m', 'r', 'sp'] tr_e = [] tr_index = 4 if bias: explaination.append('bias') tr_e.append('bias') #tr_index +=1 full_name = generate_model_name('AR', bias, transform, sig, order) config = get_config_from_model_name(full_name) full_name_tr = generate_model_name('TR', bias, transform, sig, order) tr_config = get_config_from_model_name(full_name_tr) for latitude in latitudes: for longitude in longitudes: explain = explaination.copy() tr_explain = tr_e.copy() for o in range(0, order + 1): name = full_name + '-L{}'.format(o) tr_name = full_name_tr + '-L{}'.format(o) w_filename = '{}weights_{}_{}_{}.nc'.format( path_ar_results, name, longitude, latitude) p_filename = '{}performance_{}_{}_{}.nc'.format( path_ar_results, name, longitude, latitude) if not (os.path.exists(w_filename) and os.path.exists(p_filename)) or overwrite_results: fil = 'all_vars_lat_lon_{}_{}.nc'.format( latitude, longitude) data = xr.open_dataset(path + fil) # if o > 0: explain.append('O{}'.format(o)) tr_explain.append('O{}'.format(o)) start_time = timeit() X_train, y_train = dataset_to_numpy_order( dataset=data.sel(time=slice('2004', '2013')), order=order, bias=bias) #print(X_train[0, :]) X_test, y_test = dataset_to_numpy_order( dataset=data.sel(time=slice('2014', '2018')), order=order, bias=bias) #print('transform {}'.format(transform)) #print(bias) if transform: # and not bias):# or (not transform and bias): X_train = transform_X(X_train, lat=latitude, lon=longitude, data=stats_data, order=o) X_test = transform_X(X_test, lat=latitude, lon=longitude, data=stats_data, order=o) #else: # print('Not valid model....') # raise OSError('Not valid model config') if sig: y_train = inverse_sigmoid(y_train) y_test = inverse_sigmoid(y_test) name = full_name + '-o{}'.format(o) tr_name = full_name_tr + '-o{}'.format(o) eval_dict = {} eval_tr_dict = {} weights_dict = {} weights_tr_dict = {} Xtr, ytr = drop_nans(X_train[:, :int(tr_index + o)], y_train) Xte, yte = drop_nans(X_test[:, :int(tr_index + o)], y_test) if sig: yte = sigmoid(yte) ytr = sigmoid(ytr) if np.isnan(yte).any(): print('Warning nans detected in training data') if np.isnan(ytr).any(): print('Warning nans detected in test data') if o > 0: # updatig predictors Tr_Xtr = Xtr[:, tr_index:] Tr_Xte = Xte[:, tr_index:] print(Tr_Xtr.shape) coeffs_tr = fit_pixel(Tr_Xtr, ytr) y_test_pred_tr = predict_pixel(Tr_Xte, coeffs_tr) y_train_pred_tr = predict_pixel(Tr_Xtr, coeffs_tr) mse_test_tr = mean_squared_error(y_test_pred_tr, yte) mse_train_tr = mean_squared_error(y_train_pred_tr, ytr) mae_test_tr = mean_absolute_error(y_test_pred_tr, yte) mae_train_tr = mean_absolute_error( y_train_pred_tr, ytr) ############# Fitting coeffs = fit_pixel(Xtr, ytr) y_test_pred = predict_pixel(Xte, coeffs) y_train_pred = predict_pixel(Xtr, coeffs) ################ Evaluation mse_test = mean_squared_error(y_test_pred, yte) mse_train = mean_squared_error(y_train_pred, ytr) mae_test = mean_squared_error(y_test_pred, yte) mae_train = mean_squared_error(y_train_pred, ytr) ##################### Adding the autoregressive model #print(coeffs) #print(explaination) weights_dict['coeffs'] = (['weights'], coeffs.flatten() ) # 'latitude', 'longitude', eval_dict['mse_test'] = mse_test[ 0] #(['latitude', 'longitude'],) eval_dict['mse_train'] = mse_train[0] eval_dict['mae_test'] = mae_test[ 0] #(['latitude', 'longitude'], ) eval_dict['mae_train'] = mae_train[ 0] #(['latitude', 'longitude'], ) num_test_samples = len(yte) num_train_samples = len(ytr) eval_dict[ 'num_test_samples'] = num_test_samples # (['latitude', 'longitude'], ) eval_dict[ 'num_train_samples'] = num_train_samples # (['latitude', 'longitude'], ) eval_dict.update(config) weights_dict.update(config) ###################### Adding traditional model if o > 0: weights_tr_dict['coeffs'] = ([ 'weights' ], coeffs_tr.flatten()) # 'latitude', 'longitude', print(weights_tr_dict) print(tr_explain) eval_tr_dict['mse_test'] = mse_test_tr[ 0] #(['latitude', 'longitude'],) eval_tr_dict['mse_train'] = mse_train_tr[0] eval_tr_dict['mae_test'] = mae_test_tr[ 0] #(['latitude', 'longitude'], ) eval_tr_dict['mae_train'] = mae_train_tr[ 0] #(['latitude', 'longitude'], ) num_test_samples = len(yte) num_train_samples = len(ytr) eval_tr_dict[ 'num_test_samples'] = num_test_samples # (['latitude', 'longitude'], ) eval_tr_dict[ 'num_train_samples'] = num_train_samples # (['latitude', 'longitude'], ) eval_tr_dict.update(tr_config) weights_tr_dict.update(tr_config) w_tr_filename = '{}/weights_{}_{}_{}.nc'.format( path_ar_results, tr_name, longitude, latitude) p_tr_filename = '{}/performance_{}_{}_{}.nc'.format( path_ar_results, tr_name, longitude, latitude) ds = xr.Dataset(weights_tr_dict, coords={ 'latitude': (['latitude'], [latitude]), 'longitude': (['longitude'], [longitude]), 'weights': (['weights'], tr_explain) }) ds.to_netcdf(w_tr_filename) ds = xr.Dataset(eval_tr_dict, coords={ 'latitude': (['latitude'], [latitude]), 'longitude': (['longitude'], [longitude]) }) ds.to_netcdf(p_tr_filename) stop_time = timeit() #print(stop_time - start_time) eval_dict['time_elapsed_seconds'] = ( stop_time - start_time) #(['latitude', 'longitude'], ) w_filename = '{}weights_{}_{}_{}.nc'.format( path_ar_results, name, longitude, latitude) p_filename = '{}performance_{}_{}_{}.nc'.format( path_ar_results, name, longitude, latitude) ds = xr.Dataset(weights_dict, coords={ 'latitude': (['latitude'], [latitude]), 'longitude': (['longitude'], [longitude]), 'weights': (['weights'], explain) }) ds.to_netcdf(w_filename) ds = xr.Dataset(eval_dict, coords={ 'latitude': (['latitude'], [latitude]), 'longitude': (['longitude'], [longitude]) }) ds.to_netcdf(p_filename) print( 'finished calibrating bias {}, sigmoid {}, Transform {}, order/Lag {} - ({}, {})' .format(bias, sig, transform, o, longitude, latitude)) else: print( 'Model config already calibrated bias {}, sigmoid {}, Transform {}, order/Lag {} - ({}, {})' .format(bias, sig, transform, o, longitude, latitude))
def run_model(model_type, appliances, interval, test_dataset, experiment_name, train_denorm=True, plot_results=False, return_time=False, export_predictions=False, verbose=False): if appliances: appliance_list = appliances else: appliance_list = APPLIANCES train_appliances = {} for app in appliance_list: train_appliances[app] = load_df(app, interval, col=app, dataset="train", denorm=train_denorm) train_mains = load_df("fridge", interval, col="mains", dataset="train", denorm=train_denorm) if model_type == "CO": model = CO({}) elif model_type == "AFHMM": model = AFHMM({}) else: raise ValueError( f"Model type {model_type} not understood. Available currently are only 'CO' and 'AFHMM'." ) train_start_time = time.time() model.partial_fit(train_main=[train_mains], train_appliances=train_appliances) train_time = time.time() - train_start_time test_appliances = {} # Average test time across datasets test_time = 0 test_appliances = {} for app in appliance_list: try: test_appliances[app] = load_df(app, interval, col=app, dataset=test_dataset, denorm=train_denorm) except: pass if model_type == "AFHMM" and test_dataset == "ECO": raise ValueError( "Do not use AFHMM with ECO. It is not currently implemented due to long testing times." ) test_time_agg = 0 eval_counter = 0 if model_type == "AFHMM": num_workers = cpu_count() # hardcoded fix for now chunk_length = 720 test_mains = load_df(appliance_list[0], interval, col="mains", dataset=test_dataset, denorm=train_denorm) test_mains = test_mains.values.flatten().reshape((-1, 1)) n = len(test_mains) n_chunks = int(math.ceil(len(test_mains) / chunk_length)) # test_mains_chunks = [test_mains_big[i:i+self.time_period] for i in range(0, test_mains_big.shape[0], self.time_period)] n_iter = math.ceil(n_chunks / num_workers) results = [] test_start_time = time.time() print(f"Starting disaggregation for {n_iter} chunks.") for i in tqdm(range(n_iter)): # print(i * num_workers * chunk_length, i * num_workers * chunk_length + chunk_length * num_workers) mains = test_mains[i * num_workers * chunk_length:i * num_workers * chunk_length + chunk_length * num_workers] # print(len(mains)) results.append(model.disaggregate_chunk(mains)[0]) pd.concat(results, axis=0).to_csv( f"quicksaves/checkpoint{i}_{interval}.csv", sep=";") test_time = time.time() - test_start_time results = pd.concat(results, axis=0)[:n] for app in appliance_list: try: if model_type == "CO": test_mains = load_df(app, interval, col="mains", dataset=test_dataset, denorm=train_denorm) n = len(test_mains) test_start_time = time.time() results = model.disaggregate_chunk( mains=pd.Series([test_mains[:n]]))[0] test_time = time.time() - test_start_time if train_denorm: true_apps = np.array(test_appliances[app][:n]) pred_apps = np.array(results[app]) else: true_apps = utils.denormalize(test_appliances[app][:n], app) pred_apps = utils.denormalize(results[app], app) mse = utils.mean_squared_error(true_apps, pred_apps) mae = utils.mean_absolute_error(true_apps, pred_apps) sae = utils.normalised_signal_aggregate_error(true_apps, pred_apps) mr = utils.match_rate(true_apps, pred_apps) log_file_dir = f"Nilmtk/logs/{experiment_name}/{model_type}_{app}.log" # In Python 3.8 we can just add force=True to the basic config, but project is written in 3.7 # so clear and reset path manually (there's probably a better way) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig(filename=log_file_dir, format='%(message)s', level=logging.INFO) test_log = f"Test dataset: {test_dataset}" logging.info(test_log) metric_string = f"MSE: {mse}" \ f" MAE: {mae}" \ f" SAE: {sae}" \ f" Match Rate: {mr}\n" logging.info(metric_string) if export_predictions: utils.check_dir(f"Nilmtk/model_predictions/{experiment_name}/") results_path = f"Nilmtk/model_predictions/{experiment_name}/{model_type}_{app}_{test_dataset}.csv" pd.DataFrame(pred_apps).to_csv(results_path, sep=";") test_time_agg += test_time eval_counter += 1 except Exception as e: if verbose: print(app, e) test_time_agg /= eval_counter if return_time: return train_time, test_time_agg