예제 #1
0
def filter3(network, **hom_params):
    """Reduce the number of detected changepoints by condensing closely-related
    changepoints into one another, by choosing the biggest amplitude change.
    
    :Param network:
        The network object containing the station/series data and metadata, and
        a record of suspect, undocumented changepoints and their approximate
        amplitude:
        :Ivar hits_array:
            A numpy array of dimensions [number of stations, number of months]
            containing the timing of the set of undocumented changepoints 
            filtered so far. 
        :Ivar amps_array:
            A numpy array of dimensions [number of stations, number of months]
            containing the approximate amplitudes of the undocumented 
            changepoints filtered so far.
    :Param hom_params:
        The parameters object containing important parameter settings for this
        analysis:
        <none so far>
    :Return:
        Record the final set of changepoints for each series in the given
        network object as the following field:
        :Ivar changepoints:
            A dictionary containing the changepoint locations and amplitudes. 
            Each key in the dictionary is the integer month index where a 
            breakpoint occurs, and each dictionary has the following fields:
            :Param jsum:
                The number of times this changepoint caused a break to be logged
                in a paired neighbor during splitmerge
            :Param ahigh:
                The amplitude change associated with the changepoint
            :Param astd:
                The error associated with the amplitude change
    """
    
    ################################################################################
    ## FILTER 3 TEST
    ## Try to reduce the number of changepoints by condensing closely-related
    ## changepoints into one another.
    ##
    ## Go back through the years/months
    ##    For each technique
    ##        find the highest remaining hits
    ##        accumulate all of the new_hits and ntests for each month +/- mrgyr 
    ##           (from amps) while skipping missing data
    ##    All new_hits and amps are zeroed out
    ##    For all accumulations within nmrgryr
    ##        when given month are greater or equal to ithresh then add to
    ##           the nhits and ntests arrays
    ##
    ## iconfirm := 2 | the min number of hits for a possible breakpoint to be valid
    ## nmrgyr := -2 | no idea what this does; it defaults to -2 
    station_list = network.stations.keys()
    ids = station_list
        
    inconfirm = 2
    nmrgyr = -2
    
    final_hits = np.zeros_like(network.hits_array)
    ifound = 0

    (num_stations, num_months) = network.amps_array.shape
    for station_index in range(num_stations):
        
        station_id = station_list[station_index]
        data = network.raw_series[station_id].series
        miss = network.raw_series[station_id].MISSING_VAL
        data_monthly = network.raw_series[station_id].monthly_series
        stdk = compute_monthly_avg_std(data)
        
        ## setup temp arrays 
        khits = np.zeros(hom_params['nmo'])
        ktests = np.zeros(hom_params['nmo'])
        akhigh = np.zeros(hom_params['nmo'])
        
        # iterate until there are no more high points
        istop = False
        while not istop:
            
            ihighit, ahigh = 0.0, 0.0
            
            # find the highest count - the most number of hits at a possible breakpoint
            for month in range(num_months):
                
                isum, asum = 0.0, 0.0
                if network.hits_array[station_index, month] >= inconfirm:
                    jhit = network.hits_array[station_index, month]
                    isum += jhit
                    asum += network.amps_array[station_index, month]*jhit
                
                # find the highest chgpt hit station by hits
                if isum > ihighit: 
                    ihighit = isum
                    ihighmo = month
                    ahigh = asum / isum 
            ## now -
            ##    ihighmo := month of highest hit value
            ##    ihighit := sum of hits over all tests
            ##    ahigh := estimated adjustment
            print "----itarg,ihighit,ihighmo,ahigh,stdk",station_index,ihighit,ihighmo,ahigh,stdk
                    
            #Keep going until there are no more hits
            if ihighit > 0:
                # bracket the highest hit +/- nmrgyr
                if nmrgyr != -2:
                    ibracket = nmrgyr*2 + 1
                else:
                    # else bracket using amplitude of chgpt to define month range
                    ## This are PRE-DEFINED in inhomog.parm.system.mthly.incl. They 
                    ## will need to be re-factored later in to a more logical place
                    ## (Parameters maybe?)
                    arange = [0.4, 0.6, 0.8, 1.0, 1.5, 3.0, 5.0]
                    mrgyr = [36, 18, 12, 8, 6, 5, 5]
                    nrange = len(arange)
                    
                    # Create search bracket for looking at station history files
                    for irange in range(len(arange)):
                        astd = abs(ihighit)
                        if astd < arange[irange]: break
                    ibracket = mrgyr[irange]*2 + 1       
                
                # go through the bracket, look for the highest hits already found
                # start at the hit-point index and expand outward in the series
                # i.e, if our ihighmo = 10, look at [10, 11, 9, 12, 8, 13, 7, ...]
                # keep track of missing values in both directions.
                max_radius = ibracket/2
                miss_left, miss_right = 0, 0
                radius = 0
                absorbed = False
                while radius < max_radius:
                    
                    ## DEAL WITH THE RIGHT MONTH
                    right_month = ihighmo + radius + miss_right
                    if right_month == hom_params['nmo']:
                        break
                    if data_monthly[right_month] == miss:
                        # this month is missing, go to the next
                        while data_monthly[right_month] == miss:
                            right_month += 1
                    # Absorb lesser hit into the closest higher hit
                    #print right_month, right_month-5, right_month
                    #print np.where(khits > 0)
                    if khits[right_month] > 0:
                        khits[right_month] += ihighit
                        akhigh[right_month] += ahigh*ihighit
                        print "Absorb hit: ",station_index,ihighmo," to ",right_month,khits[right_month],ktests[right_month],akhigh[right_month]/khits[right_month]
                     
                        # zero test array block for next iter
                        network.hits_array[station_index,ihighmo] = 0
                        network.amps_array[station_index,ihighmo] = 0.0
                        absorbed = True
                        break
                     
                    ## DEAL WITH THE LEFT MONTH
                    left_month = ihighmo - radius - miss_left
                    if left_month == 0:
                        break
                    if data_monthly[left_month] == miss:
                        # this month is missing, go to the next
                        while data_monthly[left_month] == miss:
                            left_month -= 1
                    
                    # Absorb lesser hit into the closest higher hit
                    #print left_month, left_month-5, left_month+5
                    #print np.where(khits > 0)
                    if khits[left_month] > 0:
                        khits[left_month] += ihighit
                        akhigh[left_month] += ahigh*ihighit
                        print "Absorb hit: ",station_index,ihighmo," to ",left_month,khits[left_month],ktests[right_month],akhigh[right_month]/khits[right_month]
                        
                        # zero test array block for next iter
                        network.hits_array[station_index,ihighmo] = 0
                        network.amps_array[station_index,ihighmo] = 0.0
                        absorbed = True
                        break
                
                    radius += 1
                        
                # if no hits found, setup new hit
                if not absorbed:
                    khits[ihighmo] = ihighit
                    ktests[ihighmo] = 1
                    akhigh[ihighmo] = ahigh*ihighit
                    print "New CHG hit: ",station_index,ihighmo,khits[ihighmo],ktests[ihighmo],akhigh[ihighmo]/khits[ihighmo]
            
                    network.hits_array[station_index, ihighmo] = 0
                    network.amps_array[station_index, ihighmo] = 0.0
                
                #raw_input("pause")
            else:
                istop = True
                
        print "----------------------------------------------"
        # examine interim khits array for station's filtered changepoints
        uchgpt_dict = dict()
        for month in range(hom_params['nmo']):
            # ... if highest hits > ithres(npair) then save
            # fetch the numbr of pairs tested
            
            if khits[month] > 0:
                npair = ktests[month]
                jsum = khits[month]
                
                ihthres = 2
                iy,im = imo2iym(month)
                print "itarg,imo,iym,npair,jsum,ihthres,stdk",station_index,month,iy,im,npair,jsum,ihthres,stdk
                if jsum >= ihthres:
                    # passed threshold test- put interim into final
                    final_hits[station_index, month] += jsum
                    ifound += 1
                    
                    # debug stuff
                    ahigh = akhigh[month]/khits[month]
                    astd = ahigh
                    print ("%5d %6s-UCHGPT KW%1d at %4d %3d %4d %6.2f %6.2f %3d %3d %3d" % 
                           (station_index, station_id, 1, iy, im, jsum, ahigh, astd, ibracket, npair, ihthres) )
                    
                    uchgpt_dict[month] = { 'jsum': jsum,
                                           'ahigh': ahigh,
                                           'astd': astd }
                    
        network.raw_series[station_id].changepoints = uchgpt_dict
        
    print "-------------------------------------------------"
    print "Undoc filter: ",ifound
예제 #2
0
def filter2(network, **hom_params):
    """Reconciles the detected undocumented changepoints with documented ones,
    if available, by "absorbing" detected changepoints near these known breaks
    in the data. 
    
    TODO: implement this functionality
    
    :Param network:
        The network object containing the station/series data and metadata, and
        a record of suspect, undocumented changepoints and their approximate
        amplitude:
        :Ivar hits_array:
            A numpy array of dimensions [number of stations, number of months]
            containing the timing of the set of undocumented changepoints 
            filtered so far. 
        :Ivar amps_array:
            A numpy array of dimensions [number of stations, number of months]
            containing the approximate amplitudes of the undocumented 
            changepoints filtered so far.
    :Param hom_params:
        The parameters object containing important parameter settings for this
        analysis:
        <none so far>
    :Return:
        does nothing right now other than print out some summary information.
        
    """
    
    ################################################################################
    ## FILTER 2
    ## won't actually do anything but print things to console for now
    ## Use station history and metadata to absorb undocumented changepoints
    ## to known ones where possible
    station_list = network.stations.keys()
    ids = station_list
    
    (num_stations, num_months) = network.amps_array.shape
    for station_index in range(num_stations):
        
        station_id = station_list[station_index]
        station_series = network.raw_series[station_id]
        data = station_series.series
        scale_series(data, 0.1, station_series.MISSING_VAL)
        stdk = compute_monthly_avg_std(data)
        
        for month in range(num_months):
            
            if network.hits_array[station_index, month] > 0:
                ahigh = network.amps_array[station_index, month]
                jsum = network.hits_array[station_index, month]
                
                astd = abs(ahigh)
                
                iy, im = imo2iym(month)
                
                ## These are PRE-DEFINED in inhomog.parm.system.mthly.incl. They 
                ## will need to be re-factored later in to a more logical place
                ## (Parameters maybe?)
                arange = [0.4, 0.6, 0.8, 1.0, 1.5, 3.0, 5.0]
                mrgyr = [36, 18, 12, 8, 6, 5, 5]
                nrange = len(arange)
                
                # Create search bracket for looking at station history files
                for irange in range(len(arange)):
                    if astd < arange[irange]: break
                ibracket = mrgyr[irange]*2 + 1
                
                print "ASTD: ",station_index,station_id,"1",iy,im,astd,ahigh,stdk,jsum,ibracket