def corr_func(x, y, **kwargs): r = np.corrcoeff(x, y)[0][1] ax = plt.gca() ax.annotate("r = {:.2f}".format(r), xy=(0.2, 0.8), xycoords=ax.transAxes, size=20)
def corr(data_samples: np.array, data_truth: np.array, agg=None, **kwargs): """Computes the empirical correlation betnween actuals and predictions :param data: Predicted time series values (n_timesteps, n_timeseries) :param data_truth: Ground truth time series values :param agg: Aggregator function that creates forecast out of samples """ agg = np.median if not agg else agg data = agg(data_samples, axis=0) return np.round(np.corrcoeff(data, data_truth, rowvar=False), 3)
def corr(data_samples: np.array, data_truth: np.array, agg=None, **kwargs): """Returns the Pearson correlation coefficient betnween observed values and aggregated predictions. :param data_samples: Predicted time series values (n_timesteps, n_timeseries). :param data_truth: Actual values observed. :param agg: Property of the forecast distribution to use for evaluation. """ agg = np.median if not agg else agg data = agg(data_samples, axis=0) return np.round(np.corrcoeff(data, data_truth, rowvar=False), 3)
def distances(chi1, chi2, type_comp='auc', taus=[1, 10, 25, 50], normalize=True, plot=False, savefig=False, filefig='plots/change_graph.png'): ''' Compare two graphs based on their diffusion properties: assumes that the nodes are identified INPUT: ====================================================== chi1, chi2 : two graphs of type either nx or pygsp type_comp : the distances between distributions that should be used (default: auc) taus : the scales used for heat diffusion propoagation plot, savefig,filefig: additional parameters (for plotting and saving plots) OUTPUT: ====================================================== distance : distances between diffusion distribution at different scales ''' n_nodes, dim_embed = heat_print1.shape n_filters = len(taus) level_size = dim_embed / n_filters distances = np.zeros((n_filters, n)) for m in range(n_filters): index_scale = range(m * level_size, (m + 1) * level_size) for i in range(n_nodes): if type_comp == "corr": distances[m, i] = 1 - np.corrcoeff(chi1[i, index_scale], chi2[i, index_scale]) elif type_comp == "auc": distances[m, i] = abs( compute_evolution_heat_diff(i, m, heat_print1, heat_print2, mode_diff=mode_diff)) elif type_comp == "emd": ### Required params: ### P,Q - Two histograms of size H ### D - The HxH matrix of the ground distance between bins of P and Q H = 30 hist1, bins_arr = np.histogram(heat_print1[m], H) #### Normalize histogram w = [ bins_arr[i + 1] - bins_arr[i] for i in range(len(bins_arr) - 1) ] hist1 = hist1 * 1.0 / np.matrix(w).dot(hist1) hist2, _ = np.histogram(heat_print2[m], bins_arr) hist2 = hist2 * 1.0 / np.matrix(w).dot(hist2) hist1 = np.reshape(np.matrix(hist1), [1, H]) hist2 = np.reshape(np.matrix(hist2), [1, H]) D = np.zeros((H, H)) for i in range(H): for j in range(H): D[i, j] = np.abs(bins_arr[i + 1] - bins_arr[j + 1]) distances[m, i] = emd(np.array(hist1.tolist()[0]), np.array(hist2.tolist()[0]), D) else: print 'comparison type not implemented' return np.nan if plot == True: plt.figure() sb.heatmap(distances, cmap="hot") if savefig == True: plt.savefig(filefig) agg_score = np.sum(distances) return distances, agg_score
# COV(X, Y) = E[(X - E[X])(Y-E[X])] # r = sum() # def mean(x): return sum(x) / len(x) # def coff(x, y): # mx = mean(x) # my = mean(y) # vx = mean( ) # xx = map(lambda x: x - mx) # yy = map(lambda x: x - my) # numerator = sum(map(lambda xy: xy[0] * xy[1], zip(xx, yy)) if __name__ == "__main__": main() x1 = 15,12,8,8,7,7,7,6,5,3 x2 = 10,25,17,11,13,17,20,13,9,15 print(np.corrcoeff(x1, x2))