def skill_score_murphy(predicted,reference): ''' Calculate non-dimensional skill score (SS) between two variables using definition of Murphy (1988) Calculates the non-dimensional skill score (SS) difference between two variables PREDICTED and REFERENCE. The skill score is calculated using the formula: SS = 1 - RMSE^2/SDEV^2 where RMSE is the root-mean-squre error between the predicted and reference values (RMSE)^2 = sum_(n=1)^N (p_n - r_n)^2/N and SDEV is the standard deviation of the reference values SDEV^2 = sum_(n=1)^N [r_n - mean(r)]^2/(N-1) where p is the predicted values, r is the reference values, and N is the total number of values in p & r. Note that p & r must have the same number of values. Input: PREDICTED : predicted field REFERENCE : reference field Output: SS : skill score Reference: Allan H. Murphy, 1988: Skill Scores Based on the Mean Square Error and Their Relationships to the Correlation Coefficient. Mon. Wea. Rev., 116, 2417-2424. doi: http//dx.doi.org/10.1175/1520-0493(1988)<2417:SSBOTM>2.0.CO;2 Author: Peter A. Rochford Symplectic, LLC www.thesymplectic.com [email protected] Created on Dec 7, 2016 ''' utils.check_arrays(predicted, reference) # Calculate RMSE rmse2 = rmsd(predicted,reference)**2 # Calculate standard deviation sdev2 = np.std(reference,ddof=1)**2 #% Calculate skill score ss = 1 - rmse2/sdev2 return ss
def taylorPlot(): obsCol = df.iloc[:, iObs] stds = [] rmses = [] coefs = [] labels = [] stds.append(obsCol.std()) rmses.append(0) coefs.append(1) labels.append('Fluxnet') for i in range(colN): if i != iObs: simLabel = df.columns[i] thisDF = df.dropna(subset=[simLabel, df.columns[iObs]]) simCol = thisDF[simLabel] obsCol = thisDF[df.columns[iObs]] if simCol.any(): std = simCol.std() rmse = sm.rmsd(simCol, obsCol) coef = np.corrcoef(simCol, obsCol)[0, 1] stds.append(std) rmses.append(rmse) coefs.append(coef) labels.append(simLabel) else: pass else: pass sm.taylor_diagram(np.array(stds), np.array(rmses), np.array(coefs), markerLabel=labels, markerLabelColor='r', markerSize=6, markerLegend='on', colOBS='g', styleOBS='-', markerobs='o', showlabelsRMS='on', titleRMS='on', titleOBS='Fluxnet', rmslabelformat=':.1f') plt.title(metricName, y=1.06, fontsize='large', loc='center', horizontalalignment='center') plt.savefig(argv['outputPath']) plt.close('all')
def getStatisticalIndex(): result = { 'means': [], 'stds': [], 'rmses': [], 'coefs': [], 'nses': [], 'r2s': [], 'labels': [] } for i in range(colN): if i != iObs: simLabel = df.columns[i] thisDF = df.dropna(subset=[simLabel, df.columns[iObs]]) simCol = thisDF[simLabel] obsCol = thisDF[df.columns[iObs]] std = simCol.std() mean = simCol.mean() if simCol.any(): rmse = sm.rmsd(simCol, obsCol) coef = np.corrcoef(simCol, obsCol)[0, 1] nse = 1 - sum((simCol - obsCol)**2) / sum( (obsCol - obsCol.mean())**2) r2 = coef**2 else: rmse = np.NaN coef = np.NaN nse = np.NaN r2 = np.NaN result['means'].append(mean) result['labels'].append(simLabel) result['stds'].append(std) result['rmses'].append(rmse) result['coefs'].append(coef) result['nses'].append(nse) result['r2s'].append(r2) else: result['means'].append(obsCol.mean()) result['labels'].append('Fluxnet') result['stds'].append(obsCol.std()) result['rmses'].append(0) result['coefs'].append(1) result['nses'].append(0) result['r2s'].append(1) jsonStr = json.dumps(result).replace('NaN', 'null') print(jsonStr)
# Calculate various skill metrics, writing results to screen # and Excel file. Use an ordered dictionary so skill metrics are # saved in the Excel file in the same order as written to screen. stats = OrderedDict() # Read data from pickle file data = load_obj('target_data') pred = data.pred1['data'] ref = data.ref['data'] # Get bias stats['bias'] = sm.bias(pred, ref) print('Bias = ' + str(stats['bias'])) # Get Root-Mean-Square-Deviation (RMSD) stats['rmsd'] = sm.rmsd(pred, ref) print('RMSD = ' + str(stats['rmsd'])) # Get Centered Root-Mean-Square-Deviation (CRMSD) stats['crmsd'] = sm.centered_rms_dev(pred, ref) print('CRMSD = ' + str(stats['crmsd'])) # Get Standard Deviation (SDEV) stats['sdev'] = np.std(pred) print('SDEV = ' + str(stats['sdev'])) # Get correlation coefficient (r) ccoef = np.corrcoef(pred, ref) stats['ccoef'] = ccoef[0, 1] print('r = ' + str(stats['ccoef']))
observationData = observationData.reshape(observationData.size) stds = [] rmses = [] coefs = [] stds.append(observationData.std()) rmses.append(0) coefs.append(1) for i, nc in enumerate(cmip.ncPaths): if i == 0: continue data = cmip.getData(i, allTime=False, timeIndex=0) data = data.reshape(data.size) std = data.std() rmse = sm.rmsd(data, observationData) coef = np.ma.corrcoef(data, observationData)[0, 1] stds.append(std) rmses.append(rmse) coefs.append(coef) # intervalsCOR = np.concatenate((np.arange(0,1.0,0.2), [0.9, 0.95, 0.99, 1])) sm.taylor_diagram( np.array(stds), np.array(rmses), np.array(coefs), markerLabel=cmip.markerLabels, # tickRMS = np.arange(0,25,10), # tickSTD = np.arange(9,20,5), # tickCOR = intervalsCOR, rmslabelformat=':.1f')
####### Plot Nov plt.scatter(fechan, realn, color='black', label='Data') # datos iniciales plt.plot(fechan, testn, color='red', label='Modelo RBF') # RBF kernel plt.title('Tunal (Nov 22 y 23)') #plt.plot(fechas,svr_lin.predict(fechas), color= 'green', label= 'Modelo Lineal') # lineal kernel #plt.plot(fechas,svr_poly.predict(fechas), color= 'blue', label= 'Modelo Polinomial') # Polinomial kernel plt.xlabel('Horas') plt.ylabel('Concentracion de PM_25') plt.legend() plt.savefig('Tun_Nov_22-23.png') plt.show() plt.close() print('------ Febrero 14 y 15 de 2019 ------') print(np.corrcoef(real, test)) print(sm.rmsd(test, np.array(real))) print(sm.bias(test, np.array(real))) print('------ Noviembre 22 y 23 de 2018 ------') print(np.corrcoef(realn, testn)) print(sm.rmsd(testn, np.array(realn))) print(sm.bias(testn, np.array(realn))) ##### Create files ## Feb soda = {'SVR_tun': test.tolist()} df = pd.DataFrame(soda, columns=['SVR_tun']) df.to_csv('tun_feb_svr.csv') ## Nov soda = {'SVR_tun': test.tolist()}