def prune(freqSet, H, S, rules, minConf): prunedH = [] for i in range(np.shape(H)[0]): conseq = H[i, :] # get a consequent ante = np.setdiff( freqSet, conseq) # antecedent is the rest of items in the itemset freqSetSup = S[str(freqSet)] # support for freqSet anteSup = S[str(ante)] # support for antecedent conseqSup = S[str(conseq)] # support for consequent conf = freqSetSup / anteSup # confidence lift = freqSetSup / (anteSup * conseqSup) # and lift if conf >= minConf: prunedH = np.concatenate((prunedH, conseq), axis=1) rule = { 'Antecedent': ante, 'Consequent': conseq, 'Conf': conf, 'Lift': lift, 'Sup': freqSetSup } if np.isempty(rules): rules = rule else: rules = np.concatenate((rules, rule), axis=1) return ([prunedH, rules])
def delete(self): # delete class destructor # # Syntax: # delete(self) # # Description: # Loops through parameters and, if not an object, empties them. Else, calls # the sub-object's destructor. # # Input: # obj - model object # # Output: # (none) # # Author: # Dr. Tim Peterson, The Department of Infrastructure Engineering, # The University of Melbourne. # # Date: # 24 Aug 2016 ## propNames = properties(self) for i in range(len(propNames)): if np.isempty(self.propNames[i])): continue if isobject(self.propNames[i])): delete(self.propNames[i])) else: self.propNames[i]) = []
def generate(freqSet, H, S, rules, minConf): # if frequent itemset is longer than consequent by more than 1 (m, n) = np.shape(H) if len(freqSet) > (m + 1): if m == 1: a, rules = prune(freqSet, H, S, rules, minConf) # prune 1-item consequents Hm1 = aprioriGen( H, m + 1) # use aprioriGen to generate longer consequents [Hm1, rules] = prune(freqSet, Hm1, S, rules, minConf) # prune consequents if not np.isempty(Hm1): # recursive if more consequents rules = generate(freqSet, Hm1, S, rules, minConf) return (rules)
def generateFreqItemsets(transactions, minSup): transactions = pd.Series(transactions, index=np.arange(0, len(transactions))) items_generate = [ item for transaction in transactions.values for item in transaction ] uniqItems, index, oneItemsets = np.unique( items_generate, return_inverse=True, return_counts=True) # index has indices of original items N = len(transactions) C1 = uniqItems supk = (oneItemsets) / N # support for all candidates S = {} for j in range(0, len(C1)): S[str(j)] = supk[j] Lk = { 'items': uniqItems[(supk >= minSup).ravel().nonzero()][0], 'index': (supk >= minSup).ravel().nonzero() } # must be >= minimal support L = [Lk] # get all frequent k-itemsets where k >= 2 k = 2 while True: # Ck: candidate itemsets p = L[k - 2]['index'][0] Ck = aprioriGen(p, k) support = np.zeros(np.shape(Ck)[0]) for i in range(N): # walk through all transactions t = sorted([items.index(item) for item in transactions.values[i] ]) # which item in ith transaction, returns item index support[myall(ismember( Ck, t))] = support[myall(ismember(Ck, t))] + ( 1 / N ) #if all the values in t are members, then it will return 1 Lk = { 'items': items[Ck[support >= minSup, :]], 'index': Ck[support >= minSup, :] } if not np.isempty(support): mapS = {} for i in range(len(support)): mapS[str(Ck[i, :])] = support[i] S = np.concatenate((S, mapS), axis=0) else: break if not np.isempty(Lk): L[k] = Lk k = k + 1 else: break return ([L, S, items])
def main(): #Definitions of the event fetched from config.cfg config = ConfigParser.RawConfigParser() config.read("config.cfg") startdate = config.get("input", "startdate") enddate = config.get("input", "enddate") n = config.getint("input", "n") diacorr = config.getfloat("input", "diacorr") #Convert dates to datetime try: startdt = datetime.strptime(startdate, "%Y%m%d%H%M") enddt = datetime.strptime(enddate, "%Y%m%d%H%M") except Exception: traceback.print_exc() foldername = os.path.join(config.get("input", "dataloc"), datetime.strftime(startdt, '%Y%m%d')) if not os.path.exists(foldername): os.mkdir(foldername) paraname = os.path.join( foldername, datetime.strftime(startdt, '%H%M') + '_' + datetime.strftime(enddt, '%H%M')) print(paraname) parafile = shelve.open(paraname) #Flags for optional data pluvio200 = config.getboolean("flags", "pluvio200") pluvio400 = config.getboolean("flags", "pluvio400") wind_GILL = config.getboolean("flags", "wind_GILL") FMI_met = config.getboolean("flags", "FMI_met") PIP_psd = config.getboolean("flags", "PIP_psd") PIP_vel = config.getboolean("flags", "PIP_vel") PIP_par = config.getboolean("flags", "PIP_par") PIP_parrel = config.getboolean("flags", "PIP_parrel") PIP_mass = config.getboolean("flags", "PIP_mass") PIP_minute = config.getboolean("flags", "PIP_minute") imag = config.getboolean("flags", "imag") version = config.getboolean("flags", "version") #Read precipitation data if (pluvio200 == 1 or pluvio400 == 1): print("Reading precipitation data (PLUVIO)") tv_PL200, PL200_acc, PL200_rr, tv_PL400, PL400_acc, PL400_rr = PluvioIntensity_GPM( n, startdt, enddt) else: tv_PL200 = [] PL200_acc = [] PL200_rr = [] tv_PL400 = [] PL400_acc = [] PL400_rr = [] #Convert dates to datetime try: startdt = datetime.strptime(startdate, "%Y%m%d%H%M") enddt = datetime.strptime(enddate, "%Y%m%d%H%M") except Exception: traceback.print_exc() #Save parameters to file for later examination ##parafile.write(tv_PL200, ' ' , PL200_acc, ' ' , PL200_rr , ' ' , tv_PL400 , ' ' , PL400_acc , ' ' , PL400_rr ) #shelving(parafile) #Read data from FMI-met if (FMI_met == 1): print("Reading FMI MET data") tv_FMI, temp_FMI, rh_FMI, sn_FMI, rr_FMI, press_FMI = Read_FMIstation_GPM( startdt, enddt, n) else: tv_FMI = [] temp_FMI = [] rh_FMI = [] sn_FMI = [] rr_FMI = [] press_FMI = [] #parafile.write(tv_FMI , ' ' , temp_FMI, ' ' , rh_FMI, ' ' , sn_FMI, ' ' , rr_FMI, ' ' , press_FMI) #shelving(parafile) #Read and plot GILL wind data ##TODO: Sanity check if (wind_GILL == 1): print("Reading wind data (GILL)") time_vector_GILL, mean_vel_GILL, mode_dir_GILL = WindComparison_GPM( n, startdt, enddt) else: time_vector_GILL = [] mean_vel_GILL = [] mode_dir_GILL = [] #parafile.write(time_vector_GILL, ' ', mean_vel_GILL, ' ', mode_dir_GILL) #shelving(parafile) #Read the PSD tables (size distribution) if (PIP_psd == 1): print("Reading PSD tables") D_PIP, PIP_PSD, PIPtime_psd, N_mean_PIP, Dm - PIP, N0_PIP, lambda_PIP, mu_PIP, Nw_PIP, D02_exp_PIP, lambda2_exp_PIP, PIPtime_N = ReadPIP_PSD_GPM( startdt, enddt, n, foldername) else: D_PIP = [] PIP_PSD = [] PIPtime_psd = [] N_mean_PIP = [] Dm_PIP = [] N0_PIP = [] lambda_PIP = [] mu_PIP = [] Nw_PIP = [] D02_exp_PIP = [] lambda2_exp_PIP = [] PIPtime_N = [] #parafile.write(D_PIP, ' ', PIP_PSD, ' ', PIPtime_psd, ' ',N_mean_PIP , ' ',Dm_PIP , ' ',N0_PIP , ' ', # ' ',lambda_PIP , ' ',mu_PIP , ' ',Nw_PIP , ' ',D02_exp_PIP , ' ',lambda2_exp_PIP , ' ',PIPtime_N) #shelving(parafile) #Read the velocity tables and perform a fit as function of D_PIP if (PIP_vel == 1): print("Reading the velocity tables (PIP_vel)") D_PIP, PIPtime_vel, PIPD_vel, PIPV_vel = ReadPIP_vel_GPM( startdt, enddt) avel_DPIP_vel, bvel_DPIP_vel, PIPtime_vel_n = PIPVelRel_GPM( startdt, enddt, D_PIP, PIPtime_vel, PIPD_vel, PIPV_vel, n, foldername) else: PIPtime_vel = [] PIPD_vel = [] PIPV_vel = [] avel_DPIP = [] bvel_DPIP = [] DPIP_V_vel_n = [] PIPtime_vel_n = [] #parafile.write(D_PIP, ' ', PIPtime_vel, ' ', PIPD_vel, ' ', PIPV_vel, ' ', avel_DPIP_vel, ' ', bvel_DPIP_vel, ' ', PIPtime_vel_n) #shelving(parafile) #Read the particle tables if (PIP_par == 1): D_PIP, PIPD_par, PIPV_par, PIPtime_par, PIPEmaj, PIPEmajmax, PIPEmin, PIPAR, PIPOR, PIPLen, PIPHig = ReadPIP_par_GPM( startdt, enddt) temp_PIPD = PIPD_par temp_Dmax = 2 * PIPEmajmax temp_PIPD[np.isnan(tempDmax)] = [] temp_Dmax[np.isnan(tempDmax)] = [] temp_PIPD[np.isinf(tempDmax)] = [] temp_Dmax[np.isinf(tempDmax)] = [] temp_Dmax[np.isnan(temp_PIPD)] = [] temp_PIPD[np.isnan(temp_PIPD)] = [] temp_Dmax[np.isinf(temp_PIPD)] = [] temp_PIPD[np.isinf(temp_PIPD)] = [] #Area equivalent of whole snow event ##TODO #C1 = #kD = C1[1] else: D_PIP = [] PIPD_par = [] PIPV_par = [] PIPtime_par = [] PIPEmaj = [] PIPEmajmax = [] PIPEmin = [] PIPAR = [] PIPlonX = [] PIPDia = [] PIPOR = [] PIPLen = [] PIPHig = [] kD = [] #parafile.write(D_PIP, ' ', PIPtime_vel, ' ', PIPD_vel, ' ', PIPV_vel, ' ', avel_DPIP_vel, ' ', bvel_DPIP_vel, ' ', PIPtime_vel_n) #shelving(parafile) if (PIP_mass == 1): temp_FMI[np.isnan(rh_FMI)] = [] press_FMI[np.isnan(rh_FMI)] = [] tv_FMI[np.isnan(rh_FMI)] = [] rh_FMI[np.isnan(rh_FMI)] = [] press_FMI[np.isnan(temp_FMI)] = [] tv_FMI[np.isnan(temp_FMI)] = [] rh_FMI[np.isnan(temp_FMI)] = [] temp_FMI[np.isnan(temp_FMI)] = [] tv_FMI[np.isnan(press_FMI)] = [] rh_FMI[np.isnan(press_FMI)] = [] temp_FMI[np.isnan(press_FMI)] = [] press_FMI[np.isnan(press_FMI)] = [] envr.temp = temp_FMI envr.press = press_FMI envr.time = tv_FMI envr.rh = rh_FMI / 100 time_mass, amass_PIP, bmass_PIP = MassEstimate(n, D_PIP, PIPtime_par, envr, PIPD_par, PIPV_par, PIPEmajmax, PIPAR, kD, diacorr, foldername) else: time_mass = [] amass_PIP = [] bmass_PIP = [] """ Part 2 """ #Calculate the accumulation and reflectivity from the mass estimate master_time_vector = np.arange(startdt, enddt, timedelta(minutes=n)) #PSD properties N0_mtv = [] lambda_mtv = [] mu_mtv = [] N02_exp_mtv = [] lambda2_exp_mtv = [] D02_exp_mtv = [] N_mean_PIP_mtv = [] Dmax_mtv = [] #Selected a, b (mass factors) and av, bv (velocity factors) amass_mtv = [] bmass_mtv = [] avel_mtv_max = [] bvel_mtv_max = [] """ #Choose used diameter correction if diacorr == 0.9: time = time_mass.MH05_maxmaxD_corrconst08 amass = amass_PIP.MH05_maxmaxD_corrconst08 bmass = bmass_PIP.MH05_maxmaxD_corrconst08 elif diacorr == 0.82: time = time_mass.MH05_maxmaxD_corrconst06 amass = amass_PIP.MH05_maxmaxD_corrconst06 bmass = bmass_PIP.MH05_maxmaxD_corrconst06 elif diacorr == 0.7: time = time_mass.MH05_maxmaxD_corrconst04 amass = amass_PIP.MH05_maxmaxD_corrconst04 bmass = bmass_PIP.MH05_maxmaxD_corrconst04 else: time_mtv = time_mass.MH05_maxmaxD amass = amass_PIP.MH05_maxmaxD bmass = bmass_PIP.MH05_maxmaxD """ accum_PIP_fac_MH05_maxmaxD = [] accum_PIP_fac_MH05_maxmaxD_corrconst08 = [] accum_PIP_fac_MH05_maxmaxD_corrconst06 = [] accum_PIP_fac_MH05_maxmaxD_corrconst04 = [] Ze_PIP_fac_MH05_maxmaxD = [] Ze_PIP_fac_MH05_maxmaxD_corrconst08 = [] Ze_PIP_fac_MH05_maxmaxD_corrconst06 = [] Ze_PIP_fac_MH05_maxmaxD_corrconst04 = [] diff_PIP = np.zeros(np.shape(D_PIP)) diff_PIP[0] = D_PIP[0] diff_PIP[1:] = np.diff(D_PIP) diacorr_no = kD / 1 diacorr08 = kD / 0.9 diacorr06 = kD / 0.82 diacorr04 = kD / 0.70 for dd in (np.arange(1, np.shape(master_time_vector)[1])): d_ind = np.where(time_mass.MH05_maxmaxD > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_mass.MH05_maxmaxD < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) #d_ind = np.where(time_mass.MH05_maxmaxD > (master_time_vector[dd]-30*(1/(24*3600))) & time_mass.MH05_maxmaxD < (master_time_vector[dd]+30*(1/(24*3600)))); dn_ind = np.where(PIPtime_n > (master_time_vector[dd] - 30 * (1 / 24 * 3600)) and PIPtime_n < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) #dn_ind = np.where(PIPtime_n > (master_time_vector(dd)-30*(1/(24*3600))) & PIPtime_n < (master_time_vector(dd)+30*(1/(24*3600)))); dv_ind = np.where(time_vector_parrel_max > ( master_time_vector[dd] - 30 * (1 / (24 * 3600)) and time_vector_parrel_max < (master_time_vector[dd] + 30 * (1 / (24 * 3600))))) #dv_ind = np.where(time_vector_parrel_max > (master_time_vector(dd)-30*(1/(24*3600))) & time_vector_parrel_max < (master_time_vector(dd)+30*(1/(24*3600)))); if (np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty(dv_ind) == 0): #PSD parameters N0_mtv = np.column_stack(N0_mtv, N0_PIP(dn_ind)) lambda_mtv = np.column_stack(lambda_mtv, lambda_PIP(dn_ind)) mu_mtv = np.column_stack(mu_mtv, mu_PIP(dn_ind)) N02_exp_mtv = np.column_stack(N02_exp_mtv, N02_exp_PIP[dn_ind]) lambda2_exp_mtv = np.column_stack(lambda2_exp_mtv, lambda2_exp_PIP[dn_ind]) D02_exp_mtv = np.column_stack(D02_exp_mtv, D02_exp_PIP[dn_ind]) N_mean_PIP_mtv = np.row_stack(N_mean_PIP_mtv, N_mean_PIP[dn_ind, :]) Dmax_mtv = np.column_stack(Dmax_mtv, Dmax_PIP(dn_ind)) #particle relations avel_mtv_max = np.column_stack(avel_mtv_max, avel_PIP_par_max(dv_ind)) bvel_mtv_max = np.column_stack(bvel_mtv_max, bvel_PIP_par_max(dv_ind)) accum_PIP_fac_MH05_maxmaxD = np.column_stack( accum_PIP_fac_MH05_maxmaxD, n * 60 * 10 ^ (-3) * np.nansum(amass_PIP.MH05_maxmaxD[d_ind] * (diacorr_no * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD[d_ind]) * avel_PIP_par_max[dv_ind] * (diacorr_no * D_PIP) ^ (bvel_PIP_par[dv_ind]) * N_mean_PIP[dn_ind, :] * diff_PIP)) if (bmass_PIP.MH05_maxmaxD(d_ind) >= 1 and bmass_PIP.MH05_maxmaxD(d_ind) < 3.5) and ( bvel_PIP_par_max(dv_ind) >= 0): Ze_PIP_fac_MH05_maxmaxD = np.column_stack( Ze_PIP_fac_MH05_maxmaxD, 10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^ 2 * nansum((amass_PIP.MH05_maxmaxD(d_ind) * (diacorr_no * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD(d_ind))) ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP)) amass_mtv = np.column_stack(amass_mtv, amass(d_ind)) bmass_mtv = np.column_stack(bmass_mtv, bmass(d_ind)) else: Ze_PIP_fac_MH05_maxmaxD = np.column_stack( Ze_PIP_fac_MH05_maxmaxD, 0) amass_mtv = np.column_stack(amass_mtv, 0) bmass_mtv = np.column_stack(bmass_mtv, 0) else: accum_PIP_fac_MH05_maxmaxD = np.column_stack( accum_PIP_fac_MH05_maxmaxD, 0) Ze_PIP_fac_MH05_maxmaxD = np.column_stack(Ze_PIP_fac_MH05_maxmaxD, 0) amass_mtv = np.column_stack(mass_mtv, 0) bmass_mtv = np.column_stack(bmass_mtv, 0) N0_mtv = np.column_stack(N0_mtv, 0) lambda_mtv = np.column_stack(lambda_mtv, 0) mu_mtv = np.column_stack(mu_mtv, 0) N02_exp_mtv = np.column_stack(N02_exp_mtv, 0) D02_exp_mtv = np.column_stack(D02_exp_mtv, 0) lambda2_exp_mtv = np.column_stack(lambda2_exp_mtv, 0) N_mean_PIP_mtv = np.row_stack(N_mean_PIP_mtv, np.zeros(size(D_PIP))) Dmax_mtv = np.column_stack(Dmax_mtv, 0) avel_mtv_max = np.column_stack(avel_mtv_max, 0) bvel_mtv_max = np.column_stack(bvel_mtv_max, 0) d_ind = np.where(time_mass.MH05_maxmaxD_corrconst08 > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_mass.MH05_maxmaxD_corrconst08 < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) dn_ind = np.where( PIPtime_n > (master_time_vector(dd) - 30 * (1 / (24 * 3600))) & PIPtime_n < (master_time_vector(dd) + 30 * (1 / (24 * 3600)))) dv_ind = np.where( time_vector_parrel_max > (master_time_vector(dd) - 30 * (1 / (24 * 3600))) & time_vector_parrel_max < (master_time_vector(dd) + 30 * (1 / (24 * 3600)))) if np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty( dv_ind) == 0: accum_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst08, n * 60 * 10 ^ (-3) * np.nansum(amass_PIP.MH05_maxmaxD_corrconst08[d_ind] * (diacorr08 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst08[d_ind]) * avel_PIP_par_max[dv_ind] * (D_PIP * diacorr08) ^ (bvel_PIP_par_max[dv_ind]) * N_mean_PIP[dn_ind, :] * diff_PIP)) if (bmass_PIP.MH05_maxmaxD_corrconst08(d_ind) >= 1 and bmass_PIP.MH05_maxmaxD_corrconst08(d_ind) < 3.5) and ( bvel_PIP_par_max(dv_ind) >= 0): Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst08, 10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^ 2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst08[d_ind] * (diacorr08 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst08[d_ind])) ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP)) else: Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst08, 0) else: accum_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst08, 0) Ze_PIP_fac_MH05_maxmaxD_corrconst08 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst08, 0) d_ind = np.where(time_mass.MH05_maxmaxD_corrconst06 > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_mass.MH05_maxmaxD_corrconst06 < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) dn_ind = np.where(PIPtime_n > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and PIPtime_n < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) dv_ind = np.where(time_vector_parrel_max > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_vector_parrel_max < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) if np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty( dv_ind) == 0: accum_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst06, n * 60 * 10 ^ (-3) * np.nansum(amass_PIP.MH05_maxmaxD_corrconst06[d_ind] * (diacorr06 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind]) * avel_PIP_par_max[dv_ind] * (diacorr06 * D_PIP) ^ (bvel_PIP_par_max[dv_ind]) * N_mean_PIP[dn_ind, :] * diff_PIP)) if (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind] >= 1 and bmass_PIP.MH05_maxmaxD_corrconst06[d_ind] < 3.5) and ( bvel_PIP_par_max[dv_ind] >= 0): Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst06, 10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^ 2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst06[d_ind] * (diacorr06 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst06[d_ind])) ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP)) else: Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst06, 0) else: accum_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst06, 0) Ze_PIP_fac_MH05_maxmaxD_corrconst06 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst06, 0) d_ind = np.where(time_mass.MH05_maxmaxD_corrconst04 > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_mass.MH05_maxmaxD_corrconst04 < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) dn_ind = np.where( PIPtime_n > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) & PIPtime_n < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) dv_ind = np.where(time_vector_parrel_max > (master_time_vector[dd] - 30 * (1 / (24 * 3600))) and time_vector_parrel_max < (master_time_vector[dd] + 30 * (1 / (24 * 3600)))) if (np.isempty(d_ind) == 0 and np.isempty(dn_ind) == 0 and np.isempty(dv_ind) == 0): accum_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst04, n * 60 * 10 ^ (-3) * np.nansum(amass_PIP.MH05_maxmaxD_corrconst04[d_ind] * (diacorr04 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind]) * avel_PIP_par_max[dv_ind] * (diacorr04 * D_PIP) ^ (bvel_PIP_par[dv_ind]) * N_mean_PIP[dn_ind, :] * diff_PIP)) if (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind] >= 1 and bmass_PIP.MH05_maxmaxD_corrconst04[d_ind] < 3.5) and ( bvel_PIP_par_max[dv_ind] >= 0): Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst04, 10 ^ 6 * 1.2076 * 0.2 / 0.93 * (6 / pi) ^ 2 * np.nansum((amass_PIP.MH05_maxmaxD_corrconst04[d_ind] * (diacorr04 * 0.1 * D_PIP) ^ (bmass_PIP.MH05_maxmaxD_corrconst04[d_ind])) ^ 2 * N_mean_PIP[dn_ind, :] * diff_PIP)) else: Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst04, 0) else: accum_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack( accum_PIP_fac_MH05_maxmaxD_corrconst04, 0) Ze_PIP_fac_MH05_maxmaxD_corrconst04 = np.column_stack( Ze_PIP_fac_MH05_maxmaxD_corrconst04, 0) #shelving(parafile) #Plot the summary of the event (IMPORTANT) Plot_EventSummary_GPM() #Define Z(S) with Rayleigh approximation #Choose the relation if (diacorr == 0.9): accum = accum_PIP_fac_MH05_maxmaxD_corrconst08 Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst08 fname1 = os.path.join(foldername, '\ZeS_corrconst08_', startdt, '_', enddt) fname2 = os.path.join(foldername, '\ZeS_timeseries_corrconst08_', startdt, '_', enddt) elif (diacorr == 0.82): accum = accum_PIP_fac_MH05_maxmaxD_corrconst06 Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst06 fname1 = os.path.join(foldername, '\ZeS_corrconst06_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) fname2 = os.path.join(foldername, '\ZeS_timeseries_corrconst06_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) elif (diacorr == 0.7): accum = accum_PIP_fac_MH05_maxmaxD_corrconst04 Ze = Ze_PIP_fac_MH05_maxmaxD_corrconst04 fname1 = (foldername, '\ZeS_corrconst04_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) fname2 = (foldername, '\ZeS_timeseries_corrconst04_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) else: accum = accum_PIP_fac_MH05_maxmaxD Ze = Ze_PIP_fac_MH05_maxmaxD fname1 = (foldername, '\ZeS_nocorr_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) fname2 = (foldername, '\ZeS_timeseries_nocorr_', datestr(event_start_time, 30), '_', datestr(event_end_time, 30)) parafile.close()
def outlierDetection(headData, isOutlier, nSigma_threshold): # Initialise outputs noise_sigma = np.inf x_opt = [] model_calib = [] # Initialise 'isOutliers' if it's not supplied by the user if np.isempty(isOutlier): isOutlier = False(np.shape([headData,1])) isNewOutlier = False(np.shape([headData,1])) isOutlier_input = isOutlier # Build inputs for exponential smoothing model t = headData[:,1] h_obs = headData[:,2] dummyBoreID = 'BoreID_123' coordinates = [dummyBoreID, -999, -999 'Precip', -999, -999] forcingData = [t[1]-10 : t[end]+10] forcingData = table(year(forcingData), month(forcingData), day(forcingData), np.zeros(np.shape([forcingData,1]),1), 'VariableNames', ['Year', 'Month', 'Day', 'Precip']) h_obs_model = [year(t), month(t), day(t), hour(t), minute(t), second(t), h_obs] # Calibrate exponential smoothing model summaryStr = [] noise_sigma = 0 i = 1 doFinalCalibration = False el = 0 while (i==1) | (np.sum(isNewOutlier)>0) | (doFinalCalibration==True): # Build model model_calib = HydroSightModel('Outlier detection', dummyBoreID, 'ExpSmooth', h_obs_model[~isOutlier,:], -999, forcingData, coordinates, False) # Calibrate model calibrateModel(model_calib, [], 0, np.inf, 'SPUCI', 2) # Get the standard deviation of the noise. noise_sigma = model_calib.model.variables.sigma_n # Exit if the this loop is being undertaken if doFinalCalibration==True: break # Store calibrated parameters alpha = model_calib.model.parameters.alpha beta = model_calib.model.parameters.beta gamma = model_calib.model.parameters.gamma meanHead_calib = model_calib.model.variables.meanHead_calib initialHead = model_calib.model.variables.initialHead initialTrend = model_calib.model.variables.initialTrend # Loop through each non-outlier observation to omit it from the # simulation. This is done to exclude a possible outlier point from # the smoothed estimate and the resulting calculation of the # noise. If the difference between the current obs point and the # forcast is greater than this noise estimate, then it is denoted # as an outlier. Importantly, when calculating the noise the min and # max points are also excluded. isNewOutlier = False(np.shape(isOutlier)) filt = isOutlier ind = find(~isOutlier) # <<< find ? for k,j in enumerate(ind[2:end]): # Get a vector of obs points excluding the current obs point, point ind[j]. filt[j] = True time_points_trim = headData[~filt, 1] delta_t = headData[j, 1] - headData[j-1, 1] h_obs_trim = headData[~filt, 2] h_obs_trim = [year(time_points_trim), month(time_points_trim), day(time_points_trim), hour(time_points_trim), minute(time_points_trim), second(time_points_trim), h_obs_trim] # Rebuild the model without the current time point, assign the # calibrated parameters and solve the model. model = HydroSightModel('Outlier detection', dummyBoreID, 'ExpSmooth', h_obs_trim, -999, forcingData, coordinates, False) model.model.parameters.alpha = alpha model.model.parameters.beta = beta model.model.parameters.gamma = gamma model.model.variables.meanHead_calib = meanHead_calib model.model.variables.calibraion_time_points = time_points_trim model.model.variables.initialHead = initialHead model.model.variables.initialTrend = initialTrend # Add current point back in the simulation. Note, when # the simulation is undertaken for a point does not exist in # model, then it is forecast. filt[j] = False time_points_trimExtended = headData[~filt,1] h_mod_trim = solveModel(model, time_points_trimExtended, [], 'NoLabel', False) h_forecast_trim = model.model.variables.h_forecast # Create a filter to remove the current point from the forecast # and then calculate the residuals obs_filt = [1:k-1, k+1:len(ind)] resid_trim = h_obs_trim[:,end] - h_forecast_trim[obs_filt] # To minimise the impacts of outliers as yet identified, create # a filter to remove the most negative and posative values from # the residuals resid_filt = resid_trim[(resid_trim>np.min(resid_trim)) & (resid_trim<np.max(resid_trim))] resid_trim = resid_trim[resid_filt] time_points_trim = time_points_trim[resid_filt] # Calculate innovations innov = resid_trim[2:end] - resid_trim[1:end-1] .* np.exp(-10.**model.model.parameters.beta .* np.diff(time_points_trim)) # Calculate st. dev. of residuals noise #sigma_n_trimmed = sqrt(mean(innov.^2 ./ (1 - exp( -2 .* 10.^model.model.parameters.beta .* diff(time_points_trim) )))) # Calculate st. dev. of residual for the current forecast only # Note: estimate of sigma_n at a spacific time step is derived # from von Asmuth 2015 doi:10.1029/2004WR00372 eqn A7 but with # the innovations at t, v_t, replaced with the mean. The was # undertaken so that sigma_n,t is independent from the residual forecast. sigma_n_trimmed = np.sqrt(np.mean(innov.**2.) ./ (1.-np.exp(-2 .* 10.**model.model.parameters.beta .* delta_t))) # Calculate residual for omitted obs point. resid_point = h_obs[j] - h_forecast_trim[k] # Break for-loop if an outlier is detected. if np.abs(resid_point) >= nSigma_threshold*sigma_n_trimmed: isNewOutlier[j] = True el +=1 summaryStr[el] = ['Date : ', str(t[j]),', Head : ', str(h_obs[j]), ', Smoothed forecast head : ', str(h_forecast_trim[k]), ', Residual head : ', str(resid_point), ', St. dev of noise : ', str(sigma_n_trimmed)] break # Aggregate new outliers with previously detected outliers isOutlier = [isOutlier, isNewOutlier] # If the while loop is to exit, then set flag to do one last # calibration so that the noise is best estimated. if np.sum(isNewOutlier)==0: doFinalCalibration = True #update counter #i=i+1 # needed twice ? # Assign the final parameters x_opt = getParameters(model_calib.model) # Exclude input outliers from those input. That is, only return the # outliers identified from the exponential smoothing model isOutlier(isOutlier_input) = False # Print summary: print 'Summary of Outliers Detected' print '----------------------------' for i in range(el): print summaryStr[i] print '----------------------------' return isOutlier, noise_sigma, x_opt, model_calib
def qc_kk(II, QI, IQ, QQ, Ihigh, Qhigh, Ilow, Qlow, Ierr, Qerr): erfcinv = lambda x: erfinv(1 - x) tval = lambda x: 2**.5 * erfcinv(2 * x) rel_pwr = lambda Xhi, Xlo: 1.819745692478292 / (tval(Xhi) + tval(Xlo))**2 relPwrQ = rel_pwr(Qhigh, Qlow) relPwrI = rel_pwr(Ihigh, Ilow) qc_power = (relPwrQ * relPwrI)**.5 x1 = np.array([Qhigh, Ihigh, Ihigh, Qhigh]) x2 = np.array([Qlow, Ilow, Ilow, Qlow]) y1 = np.array([Qhigh, Ihigh, Qhigh, Ihigh]) y2 = np.array([Qlow, Ilow, Qlow, Ilow]) Xh, Xl, Yh, Yl = tval(x1), -tval(x2), tval(y1), -tval(y2) Xh = np.tile(Xh, (len(II), 1)) Xl = np.tile(Xl, (len(II), 1)) Yh = np.tile(Yh, (len(II), 1)) Yl = np.tile(Yl, (len(II), 1)) sqrt2 = 2**-.5 x1 = erfc(sqrt2 * Xh) x2 = erfc(-sqrt2 * Xl) y1 = erfc(sqrt2 * Yh) y2 = erfc(-sqrt2 * Yl) Cxy = np.array([QQ, II, IQ, QI]).transpose() TC0 = .25 * (x1 * y1 + x2 * y2 - x1 * y2 - x2 * y1) TC1=1/(2*np.pi)*(\ (np.exp(-0.5*Xh*Xh)+np.exp(-0.5*Xl*Xl))*\ (np.exp(-0.5*Yh*Yh)+np.exp(-0.5*Yl*Yl))) TC2=1/(4*np.pi)*(\ (Xh*np.exp(-0.5*Xh*Xh)+Xl*np.exp(-0.5*Xl*Xl))*\ (Yh*np.exp(-0.5*Yh*Yh)+Yl*np.exp(-0.5*Yl*Yl))) TC3=1/(12*np.pi)*(\ ((1-Xh*Xh)*np.exp(-0.5*Xh*Xh)+(1-Xl*Xl)*np.exp(-0.5*Xl*Xl))*\ ((1-Yh*Yh)*np.exp(-0.5*Yh*Yh)+(1-Yl*Yl)*np.exp(-0.5*Yl*Yl))) # FIND ROOTS TO 3rd DEGREE POLYNOMIAL a, b, c = TC2 / TC3, TC1 / TC3, (TC0 - Cxy) / TC3 p, q = b - (1 / 3) * (a**2), c + (1 / 27) * (2 * (a**3) - 9 * a * b) K = 0.5 * q + np.sign(q) * np.sqrt(0.25 * (q**2) + (1 / 27) * (p**3)) qc_data = Cxy * 0 #Handle NaNs map_nan = np.isnan(K) qc_data[map_nan] = Cxy[map_nan] map_real = (K.imag == 0) & ~map_nan #Handle case K=0 map_real_0 = (K == 0) & map_real qc_data[map_real_0] = -a[map_real_0] / 3 #Handle optimization of roots map_opt = map_real == ~map_real_0 x = K[map_opt].transpose() r_opt = 0.5 * x r2 = r_opt * 1. e = x * 0 + 1. run = (e == 1) max_steps = 10000 err_thres = 1e-10 # Iterate to find roots while np.any(run) and (max_steps > 0): max_steps -= 1 r2[run] = r_opt[run] r_opt[run] = 0.5 * (x[run] / (r_opt[run]**2) + r_opt[run]) e[run] = np.abs(r2[run] - r_opt[run]) run[run] = e[run] > err_thres # Insert found roots qc_data[map_opt] = p[map_opt] / (3 * r_opt) - r_opt - a[map_opt] / 3 # Handle case imaginary roots map_imag = K.imag != 0 & ~map_nan if np.any(map_imag): coeff4 = TC3[map_imag] coeff3 = TC2[map_imag] coeff2 = TC1[map_imag] coeff1 = TC0[map_imag] - Cxy[map_imag] corr_compressed = Cxy[map_imag] res = coeff1 * 0 for i in range(len(coeff1)): r_roots = np.roots([coeff4[i], coeff3[i], coeff2[i], coeff1[i]]) if len(r_roots ) == 1 & np.isreal(r_roots) & r_roots < 1 & r_roots > -1: res[i] = r_roots else: # Throw away complex roots and out-of-range values tmp = r_roots[(np.abs(r_roots.imag) < 1e-10) & (np.abs(r_roots.real) < 1)].real if len(tmp) == 1: # case one root in the range res[i] = tmp elif len( tmp ) > 1: # case multiple roots in the range -> invalid solution (trash, I) = np.min(np.abs(tmp - corr_compressed[i])) res[i] = tmp[I] elif np.isempty(tmp): # case no roots # This happens for large rho values. Set it equal to +/-1. # Normally only happens for the first autocorr. values. res[i] = np.sign(corr_compressed[i]) qc_data[map_imag] = res # Handle big values map_big = np.abs(qc_data) > 1 qc_data[map_big] = np.sign(Cxy[map_big]) QQqq = qc_data[:, 0] IIqq = qc_data[:, 1] IQqq = qc_data[:, 2] QIqq = qc_data[:, 3] # Rqq=IIqq+QQqq+1j*(IQqq-QIqq) # complex version # real version Rqq = np.zeros((QQqq.size * 2, )) QIqq[0] = 0 QIqq = np.roll(QIqq, -1) Rqq[::2] = IIqq + QQqq Rqq[1::2] = IQqq + QIqq # 0.5 correlation in Q and I channels => divide autocorrelation function # by 2 for it to be strictly correct Rqq = Rqq / 2 w = np.hanning(Rqq.size * 2) yqq = qc_power * np.absolute(np.fft.hfft(Rqq * w[Rqq.size:])) #yqq=qc_power*np.absolute(np.fft.hfft(Rqq)) yqq[0] = yqq[1] yqq = yqq[yqq.size // 2:] return yqq