def filter3(network, **hom_params): """Reduce the number of detected changepoints by condensing closely-related changepoints into one another, by choosing the biggest amplitude change. :Param network: The network object containing the station/series data and metadata, and a record of suspect, undocumented changepoints and their approximate amplitude: :Ivar hits_array: A numpy array of dimensions [number of stations, number of months] containing the timing of the set of undocumented changepoints filtered so far. :Ivar amps_array: A numpy array of dimensions [number of stations, number of months] containing the approximate amplitudes of the undocumented changepoints filtered so far. :Param hom_params: The parameters object containing important parameter settings for this analysis: <none so far> :Return: Record the final set of changepoints for each series in the given network object as the following field: :Ivar changepoints: A dictionary containing the changepoint locations and amplitudes. Each key in the dictionary is the integer month index where a breakpoint occurs, and each dictionary has the following fields: :Param jsum: The number of times this changepoint caused a break to be logged in a paired neighbor during splitmerge :Param ahigh: The amplitude change associated with the changepoint :Param astd: The error associated with the amplitude change """ ################################################################################ ## FILTER 3 TEST ## Try to reduce the number of changepoints by condensing closely-related ## changepoints into one another. ## ## Go back through the years/months ## For each technique ## find the highest remaining hits ## accumulate all of the new_hits and ntests for each month +/- mrgyr ## (from amps) while skipping missing data ## All new_hits and amps are zeroed out ## For all accumulations within nmrgryr ## when given month are greater or equal to ithresh then add to ## the nhits and ntests arrays ## ## iconfirm := 2 | the min number of hits for a possible breakpoint to be valid ## nmrgyr := -2 | no idea what this does; it defaults to -2 station_list = network.stations.keys() ids = station_list inconfirm = 2 nmrgyr = -2 final_hits = np.zeros_like(network.hits_array) ifound = 0 (num_stations, num_months) = network.amps_array.shape for station_index in range(num_stations): station_id = station_list[station_index] data = network.raw_series[station_id].series miss = network.raw_series[station_id].MISSING_VAL data_monthly = network.raw_series[station_id].monthly_series stdk = compute_monthly_avg_std(data) ## setup temp arrays khits = np.zeros(hom_params['nmo']) ktests = np.zeros(hom_params['nmo']) akhigh = np.zeros(hom_params['nmo']) # iterate until there are no more high points istop = False while not istop: ihighit, ahigh = 0.0, 0.0 # find the highest count - the most number of hits at a possible breakpoint for month in range(num_months): isum, asum = 0.0, 0.0 if network.hits_array[station_index, month] >= inconfirm: jhit = network.hits_array[station_index, month] isum += jhit asum += network.amps_array[station_index, month]*jhit # find the highest chgpt hit station by hits if isum > ihighit: ihighit = isum ihighmo = month ahigh = asum / isum ## now - ## ihighmo := month of highest hit value ## ihighit := sum of hits over all tests ## ahigh := estimated adjustment print "----itarg,ihighit,ihighmo,ahigh,stdk",station_index,ihighit,ihighmo,ahigh,stdk #Keep going until there are no more hits if ihighit > 0: # bracket the highest hit +/- nmrgyr if nmrgyr != -2: ibracket = nmrgyr*2 + 1 else: # else bracket using amplitude of chgpt to define month range ## This are PRE-DEFINED in inhomog.parm.system.mthly.incl. They ## will need to be re-factored later in to a more logical place ## (Parameters maybe?) arange = [0.4, 0.6, 0.8, 1.0, 1.5, 3.0, 5.0] mrgyr = [36, 18, 12, 8, 6, 5, 5] nrange = len(arange) # Create search bracket for looking at station history files for irange in range(len(arange)): astd = abs(ihighit) if astd < arange[irange]: break ibracket = mrgyr[irange]*2 + 1 # go through the bracket, look for the highest hits already found # start at the hit-point index and expand outward in the series # i.e, if our ihighmo = 10, look at [10, 11, 9, 12, 8, 13, 7, ...] # keep track of missing values in both directions. max_radius = ibracket/2 miss_left, miss_right = 0, 0 radius = 0 absorbed = False while radius < max_radius: ## DEAL WITH THE RIGHT MONTH right_month = ihighmo + radius + miss_right if right_month == hom_params['nmo']: break if data_monthly[right_month] == miss: # this month is missing, go to the next while data_monthly[right_month] == miss: right_month += 1 # Absorb lesser hit into the closest higher hit #print right_month, right_month-5, right_month #print np.where(khits > 0) if khits[right_month] > 0: khits[right_month] += ihighit akhigh[right_month] += ahigh*ihighit print "Absorb hit: ",station_index,ihighmo," to ",right_month,khits[right_month],ktests[right_month],akhigh[right_month]/khits[right_month] # zero test array block for next iter network.hits_array[station_index,ihighmo] = 0 network.amps_array[station_index,ihighmo] = 0.0 absorbed = True break ## DEAL WITH THE LEFT MONTH left_month = ihighmo - radius - miss_left if left_month == 0: break if data_monthly[left_month] == miss: # this month is missing, go to the next while data_monthly[left_month] == miss: left_month -= 1 # Absorb lesser hit into the closest higher hit #print left_month, left_month-5, left_month+5 #print np.where(khits > 0) if khits[left_month] > 0: khits[left_month] += ihighit akhigh[left_month] += ahigh*ihighit print "Absorb hit: ",station_index,ihighmo," to ",left_month,khits[left_month],ktests[right_month],akhigh[right_month]/khits[right_month] # zero test array block for next iter network.hits_array[station_index,ihighmo] = 0 network.amps_array[station_index,ihighmo] = 0.0 absorbed = True break radius += 1 # if no hits found, setup new hit if not absorbed: khits[ihighmo] = ihighit ktests[ihighmo] = 1 akhigh[ihighmo] = ahigh*ihighit print "New CHG hit: ",station_index,ihighmo,khits[ihighmo],ktests[ihighmo],akhigh[ihighmo]/khits[ihighmo] network.hits_array[station_index, ihighmo] = 0 network.amps_array[station_index, ihighmo] = 0.0 #raw_input("pause") else: istop = True print "----------------------------------------------" # examine interim khits array for station's filtered changepoints uchgpt_dict = dict() for month in range(hom_params['nmo']): # ... if highest hits > ithres(npair) then save # fetch the numbr of pairs tested if khits[month] > 0: npair = ktests[month] jsum = khits[month] ihthres = 2 iy,im = imo2iym(month) print "itarg,imo,iym,npair,jsum,ihthres,stdk",station_index,month,iy,im,npair,jsum,ihthres,stdk if jsum >= ihthres: # passed threshold test- put interim into final final_hits[station_index, month] += jsum ifound += 1 # debug stuff ahigh = akhigh[month]/khits[month] astd = ahigh print ("%5d %6s-UCHGPT KW%1d at %4d %3d %4d %6.2f %6.2f %3d %3d %3d" % (station_index, station_id, 1, iy, im, jsum, ahigh, astd, ibracket, npair, ihthres) ) uchgpt_dict[month] = { 'jsum': jsum, 'ahigh': ahigh, 'astd': astd } network.raw_series[station_id].changepoints = uchgpt_dict print "-------------------------------------------------" print "Undoc filter: ",ifound
def filter2(network, **hom_params): """Reconciles the detected undocumented changepoints with documented ones, if available, by "absorbing" detected changepoints near these known breaks in the data. TODO: implement this functionality :Param network: The network object containing the station/series data and metadata, and a record of suspect, undocumented changepoints and their approximate amplitude: :Ivar hits_array: A numpy array of dimensions [number of stations, number of months] containing the timing of the set of undocumented changepoints filtered so far. :Ivar amps_array: A numpy array of dimensions [number of stations, number of months] containing the approximate amplitudes of the undocumented changepoints filtered so far. :Param hom_params: The parameters object containing important parameter settings for this analysis: <none so far> :Return: does nothing right now other than print out some summary information. """ ################################################################################ ## FILTER 2 ## won't actually do anything but print things to console for now ## Use station history and metadata to absorb undocumented changepoints ## to known ones where possible station_list = network.stations.keys() ids = station_list (num_stations, num_months) = network.amps_array.shape for station_index in range(num_stations): station_id = station_list[station_index] station_series = network.raw_series[station_id] data = station_series.series scale_series(data, 0.1, station_series.MISSING_VAL) stdk = compute_monthly_avg_std(data) for month in range(num_months): if network.hits_array[station_index, month] > 0: ahigh = network.amps_array[station_index, month] jsum = network.hits_array[station_index, month] astd = abs(ahigh) iy, im = imo2iym(month) ## These are PRE-DEFINED in inhomog.parm.system.mthly.incl. They ## will need to be re-factored later in to a more logical place ## (Parameters maybe?) arange = [0.4, 0.6, 0.8, 1.0, 1.5, 3.0, 5.0] mrgyr = [36, 18, 12, 8, 6, 5, 5] nrange = len(arange) # Create search bracket for looking at station history files for irange in range(len(arange)): if astd < arange[irange]: break ibracket = mrgyr[irange]*2 + 1 print "ASTD: ",station_index,station_id,"1",iy,im,astd,ahigh,stdk,jsum,ibracket