# # test error train on different year trLst = [[20150402, 20160401], [20160401, 20170401], [20170401, 20180401]] statPLst = list() statFLst = list() for k in range(3): trTrain = trLst[k] taTrain = utils.time.tRange2Array(trTrain) taAll = utils.time.tRange2Array([20150402, 20180401]) indTrain, ind2 = utils.time.intersect(taAll, taTrain) indTest = np.delete(np.arange(len(taAll)), indTrain) tempYp = ypLst[k][:, indTest] tempYf = yfLst[k][:, indTest] tempMask = maskF[:, indTest] tempObs = obs[:, indTest] tempStatP = stat.statError(utils.fillNan(tempYp, tempMask), utils.fillNan(tempObs, tempMask)) tempStatF = stat.statError(utils.fillNan(tempYf, tempMask), utils.fillNan(tempObs, tempMask)) statPLst.append(tempStatP) statFLst.append(tempStatF) # plot map and time series import importlib importlib.reload(plot) dataGrid = [ statPLst[0]['RMSE'] - statFLst[0]['RMSE'], statPLst[1]['RMSE'] - statFLst[1]['RMSE'], statPLst[2]['RMSE'] - statFLst[2]['RMSE'] ] prcp = df.getDataTs('APCP_FORA').squeeze()
temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) print(np.array([maskObs[ind, :], maskDay[ind, :]])) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskDay, return_counts=True) print(np.asarray((unique, counts)).T) print(counts / ngrid / nt) fLst = [1, 2, 3] statLstF = list() statLstP = list() maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) for nf in fLst: xp = np.full([ngrid, nt], np.nan) xf = np.full([ngrid, nt], np.nan) y = np.full([ngrid, nt], np.nan) xf[maskObsDay == nf] = yf[maskObsDay == nf] xp[maskObsDay == nf] = yp[maskObsDay == nf] y[maskObsDay == nf] = obs[maskObsDay == nf] statLstF.append(stat.statError(xf, y)) statLstP.append(stat.statError(xp, y)) # plot box - forecast matplotlib.rcParams.update({'font.size': 11}) matplotlib.rcParams.update({'lines.linewidth': 2}) matplotlib.rcParams.update({'lines.markersize': 12})
maskDay = np.zeros(maskObs.shape).astype(int) ngrid, nt = maskObs.shape for j in range(ngrid): temp = 0 for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay unique, counts = np.unique(maskObsDay, return_counts=True) maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(utils.fillNan(yp, maskF), utils.fillNan(obs, maskF)) statF = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) maskObsDay = maskObs * maskDay print(np.array([maskObs[ind, :], maskDay[ind, :]])) print(np.asarray((unique, counts)).T) print(counts / ngrid / nt) # see result for different seasons tRangeLst = [[20160401, 20160701], [20160701, 20161001], [20161001, 20170101], [20170101, 20170401], [20170401, 20170701], [20170701, 20171001], [20171001, 20180101], [20180101, 20180401]] tAllA = utils.time.tRange2Array(tAllR) statPLst = list() statFLst = list()
for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 maskObsDay = maskObs * maskDay fLst = [1, 2, 3] statLst = list() for nf in fLst: maskF = maskDay == nf temp = list() for yf in yfLst: statErr = stat.statError(utils.fillNan(yf, maskF), utils.fillNan(obs, maskF)) temp.append(statErr) statLst.append(temp) # load result from RK dirRK = r'D:\\data\\Koster17\\' fileNameLst = ['rmse_lead_{}.dat'.format(x) for x in [1, 2, 3]] tempLst = list() for k in range(3): # lon lat are identical. Tested temp = np.loadtxt(os.path.join(dirRK, fileNameLst[k])) tempLst.append(temp[:, 2]) RKlon = temp[:, 0] RKlat = temp[:, 1] lat, lon = df.getGeo()
maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay maskF = (maskDay >= 1) & (maskDay <= 3) # figure out train and test time index tR0 = [20150402, 20180401] tA0 = utils.time.tRange2Array(tR0) nt = len(tA0) tTrainLst = list() tTestLst = list() for k in range(len(yrLst)): tR = tRangeLst[k] tA = utils.time.tRange2Array(tR) ind0 = np.array(range(nt)) ind1, ind2 = utils.time.intersect(tA0, tA) tTestLst.append(np.delete(ind0, ind1)) tTrainLst.append(ind1) # calculate stat for k in range(len(yrLst)): yfTemp = utils.fillNan(yfLst[k], maskF) yfTemp = yfTemp[:, tTestLst[k]] statP = stat.statError(yfTemp, utils.fillNan(obs, maskF)) statF = stat.statError(yfTemp, utils.fillNan(obs, maskF))
statPLst = list() statFLst = list() for j in range(3): tempPLst = list() tempFLst = list() for i in range(3): trTest = trLst[i] taTest = utils.time.tRange2Array(trTest) taAll = utils.time.tRange2Array([20150402, 20180401]) ind, ind2 = utils.time.intersect(taAll, taTest) tempYp = ypLst[j][:, ind] tempYf = yfLst[j][:, ind] tempMask = maskF[:, ind] tempObs = obs[:, ind] tempStatP = stat.statError( utils.fillNan(tempYp, tempMask), utils.fillNan(tempObs, tempMask)) tempStatF = stat.statError( utils.fillNan(tempYf, tempMask), utils.fillNan(tempObs, tempMask)) tempPLst.append(tempStatP) tempFLst.append(tempStatF) statPLst.append(tempPLst) statFLst.append(tempFLst) # # plot forecast error train on different year # keyLst = ['RMSE', 'Corr'] # yrStrLst = ['2015', '2016', '2017'] # [lat, lon] = df.getGeo() # fig, axes = plt.subplots(2, 2, figsize=[8, 4]) # key = 'RMSE' # for j in range(2): # jLst = [0, 2]
ngrid, nt = maskObs.shape for j in range(ngrid): temp = 0 for i in range(nt): maskDay[j, i] = temp if maskObs[j, i] == 1: temp = 1 else: if temp != 0: temp = temp + 1 ind = np.random.randint(0, ngrid) maskObsDay = maskObs * maskDay maskF = (maskDay >= 1) & (maskDay <= 3) statP = stat.statError(yp, obs) statLst = [ stat.statError(utils.fillNan(x, maskF), utils.fillNan(obs, maskF)) for x in yfLst ] # if 'post' in doLst: caseLst = ['Predict'] + [str(nd) + 'd latency' for nd in dLst] keyLst = list(statLst[0].keys()) dataBox = list() for iS in range(len(keyLst)): key = keyLst[iS] temp = list() temp.append(statP[key]) print(key, np.nanmedian(statP[key])) for k in range(len(statLst)): data = statLst[k][key] temp.append(data)