Python compute_monthly_anomalies示例

编程语言: Python

命名空间/包名称: util

方法/功能: compute_monthly_anomalies

hotexamples.com的示例: 3

Python compute_monthly_anomalies - 已找到3个示例。这些是从开源项目中提取的最受好评的util.compute_monthly_anomalies现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： ushcn_data.py 项目： darothen/ccf-homogenization

 def monthly_anomaly_series(self):
     """Returns the monthly anomalies computed from this series as a flat
     list of length (len(self.years)*12)
     """
     anomalies = compute_monthly_anomalies(self.series, 
                                           self.MISSING_VAL)
     flat_anomalies = self._flatten_months(anomalies)
     
     return flat_anomalies

示例#2

显示文件

文件： preprocess.py 项目： wk1984/ccf-homogenization

def preprocess(network, **params):
    """Performs the pre-processing necessary to run the pairwise homogenization
    algorithm on a network of USHCN coop station data. This involves computing
    neighborhoods of stations which are close to each other, as well as 
    finding stations within these neighborhoods that are highly correlate.
    
    :Param network:
        The network to pre-process, which by this point should have the instance
        variables
        :Ivar stations:
            A dictionary mapping of station coop ids as strings to the Station
            object holding metadata for that station.
        :Ivar raw_series:
            A dictionary mapping of station coop ids as strings to the Series
            object containing the data read in for that station.
    :Return:
        Modifies network by adding the instance variables
        :Ivar neighborhoods:
            A dictionary mapping of station coop ids as strings to a list of
            other station coop id strings which are considered the "neighbors"
            of the key station.
        :Ivar correlations:
            A dictionary mapping of station coop ids as strings to a dictionary
            which maps the highest correlated neighbors of this key station to
            their correlation coefficient. For instance, if station "111111" 
            correlates to "222222" with r = 0.45 and to "333333" with r = 0.98,
            then
                network.correlations['111111'] == dict("222222":0.45,
                                                       "333333":0.98 )
    
    """

    print "Analyzing geographic network neighborhoods"

    all_neighbors = dict()
    stations_list = network.stations.values()
    for station in stations_list:

        print station.coop_id
        print "...computing neighbor distances"

        neighbors = find_neighborhood(station, stations_list, **params)
        all_neighbors[station.coop_id] = neighbors

    network.neighborhoods = all_neighbors

    # Write a neighborhood output file. Since my algorithm for computing distance
    # is slightly different than ushcn_dist_2004.v3, it won't produce exactly the
    # same distance output file. However, all the distances are within 10km, which
    # is perfectly fine. A bigger problem is that the ushcn_dist_2004.v3 outputs a
    # list of pointers for referencing the various stations. Since my code doesn't
    # emulate the Fortran code in terms of having arrays with lengths hard-coded,
    # I don't pass around pointers in the same way, so the ptr_str produced and
    # written here is garbage. It should not be a problem for coding MW2009, though,
    # because I can find information about stations dynamically and easily.
    print "...Assembling neighborhood output file"

    dist_out = open(params["dist_file"], "wb")

    for station in stations_list:
        print "   ", station
        neighbors = all_neighbors[station.coop_id]

        out_strings = neighborhood_strings(station, neighbors, stations_list)
        dist_out.writelines(out_strings)

    dist_out.close()

    ##########################################################################

    # Go through all the data we have, and replace the read-in values with
    # monthly anomalies. Then, flatten the data into a list with all the data
    # and length (endyr-begyr)*12
    for s in network.raw_series.itervalues():
        data = s.series
        anomalies = compute_monthly_anomalies(data, -9999)
        s.set_series(anomalies, s.years)

    print "Determining correlated neighbors"

    if os.path.exists(params["corr_file"]):
        print "...great, I'm gonna read it from disk...",

        corr_file = open(params["corr_file"])

        all_lines = corr_file.readlines()
        station_lines = all_lines[::2]
        corr_lines = all_lines[1::2]

        all_corrs = dict()
        for (sta_line, corr_line) in zip(station_lines, corr_lines):
            stations = sta_line.strip().split()
            this, others = stations[0], stations[1:]
            corrs = map(float, corr_line.strip().split()[1:])

            corr_dict = dict()
            for (id, corr) in zip(others, corrs):
                if not id == "000000":
                    corr_dict[id] = corr

            all_corrs[this] = corr_dict

        print " that was fast!"

    else:

        print "...need to do all the compuations"

        all_corrs = dict()

        for cand_series in network.raw_series.itervalues():

            coop_id1 = cand_series.coop_id
            corr_dict = find_correlations(cand_series, network.raw_series, network.neighborhoods[coop_id1], **params)

            all_corrs[coop_id1] = dict(corr=corr_dict)

        # Write a correlation output file. The actual correlations between stations
        # matches *perfectly* those computed with the ushcn_corr_2004.v3 code. However,
        # there are still issues with Fortran array pointers since I don't use any here
        # and I don't bother to pad the output file with bogus stations and correlations
        # if there are less than we hoped to find.
        print "...Assembling neighborhood correlation file\n"
        corr_out = open(params["corr_file"], "wb")
        station_list = network.stations.keys()
        for sta_id in station_list:
            correlations = all_corrs[sta_id]["corr"]
            sorted_neighbors = sorted(correlations.iteritems(), key=itemgetter(1), reverse=True)[
                : params["numcorr"] - 1
            ]
            # PAD PAD PAD
            ## Just pad the output with '000000' stations, r = 0.0 to make it look
            ## like the normal output.
            while len(sorted_neighbors) < params["numcorr"] - 1:
                sorted_neighbors.append(("000000", 0.0))
            ids, corrs = zip(*sorted_neighbors)
            id_str = ("%6s " % sta_id) + "".join(("%6s " % id for id in ids)) + "\n"
            # ptr_str = ("{0: >6d} ".format(test_stations.index(sta_id)+1))+"".join(("{0: >6d} ".format(test_stations.index(id)+1) for id in ids))+"\n"
            corr_str = ("  1.00 ") + "".join(("{0: >6.2f} ".format(c) for c in corrs)) + "\n"

            # corr_out.writelines((id_str, ptr_str, corr_str))
            corr_out.writelines((id_str, corr_str))

        corr_out.close()

    network.correlations = all_corrs

示例#3

显示文件

文件： splitmerge.py 项目： darothen/ccf-homogenization

def splitmerge(network, pairs=None, beg_year=1, end_year=2, **kwargs):
    
    ## EXPERIMENTAL PLACEHOLDERS - will eventually be replaced with a master
    ## loop to do all the id pairs.
    id_list = network.stations.keys()
    pair_results = dict()
    
    def dict_to_tuples(d):
        keys = d.keys()
        return [(key, d[key]) for key in keys]
    ## Generate station pairs for use in splitmerge by iteratively going through the
    ## station_list and adding stations in order of decreasing correlation. Skip a 
    ## neighbor if the pair is already present; want 20 stations or until all the
    ## correlated neighbors are used up.
#    pairs = []
#    for id1 in id_list:
#        neighbors = dict_to_tuples(network.correlations[id1])
#        sorted_neighbors = sorted(neighbors, key=operator.itemgetter(1))
#        added_pairs = 0
#        while sorted_neighbors and (added_pairs < 5):
#            id2, _ = sorted_neighbors.pop()
#            ordered_pair = tuple(sorted((id1, id2)))
#            if not ordered_pair in pairs:
#                pairs.append(ordered_pair)
#                added_pairs += 1
    
    for (id1, id2) in pairs:
        print "Pair %s with %s" % (id1, id2)
        pair_str = "%6s-%6s" % (id1, id2)
        #if pair_str != "051528-298107":
        #    continue
        
        raw_series = network.raw_series
        stations = network.stations
        series_copy = deepcopy(raw_series)
        
        min_ann = 5
        num_years = end_year - beg_year
        num_months = num_years*12
            
        for s in series_copy.itervalues():
            data = s.series
            scaled = scale_series(data, 0.1, s.MISSING_VAL)
            anomalies = compute_monthly_anomalies(scaled, s.MISSING_VAL)
            s.set_series(anomalies, s.years)
        
        ## Retrieve the data for each of the stations.
        station1 = stations[id1]
        series1 = series_copy[id1]
        data1 = series1.monthly_series
                
        station2 = stations[id2]
        series2 = series_copy[id2]
        data2 = series2.monthly_series
        
        
        #print data1[:50]
        #print data2[:50]
        #print "################################################################"
        ## Compute the difference series        
        diff_data = diff(data1, data2)
        MISS = series1.MISSING_VAL # Missing value placeholder
        
        ## Quickly pass through the data to find where it starts. We need to do this
        ## because it's possible that beg_year is earlier than the first year of 
        ## valid data in either data1 or data2. Furthermore, the original PHA code
        ## deliberately clipped off the first year of good data, so emulate that 
        ## effect here as well.
        ##
        ## Ultimately, we save the extreme early and extreme late month with valid
        ## data to use as our first guess at the undocumented changepoints.
        first = 0
        first_set = False
        last = 0
        for (i, d1, d2) in zip(xrange(num_months), data1, data2):
            if d1!=MISS and d2!=MISS:
                if first < 12:
                    first = i
                    #first_set = True
                #if not first_set:
                #    first = i
                #    first_set = True
                last = i
                
        ## Set the initial breakpoints and the list of already-found, homogenous
        ## segments.    
        breakpoints = [first, last, ]
        homog_segs = []
        
        #####################################################################
        ## BEGIN SPLITMERGE PROCESS TO GENERATE FIRST GUESS AT UNDOCUMENTED
        ## CHANGEPOINTS
        iter = 0 # counts how many times we've repeated the splitmerge process
        enter_BIC = False # break out of iterations into the BIC process?
        last_breakpoints = []
        while (iter < 10) and not enter_BIC:
            
            seg_bounds = zip(breakpoints[:-1], breakpoints[1:])
            last_breakpoints = deepcopy(breakpoints)
            new_breakpoints = deepcopy(breakpoints)
                
            new_homog_segs = []
        
            print "Parse segments (isplit = 1), ipass: "******"Too short: ", imo2iym(l), imo2iym(r)
                    continue
                
            ## If we've previously found that this segment is homogenous (has no
            ## potential changepoint), then we can skip it as well and proceed to
            ## the next one.
                # Set the within() method to check if this segment is within any
                # previously found homogenous ones. Use lambda, since we can't pass
                # keyword or positional arguments to map().
                within_this_seg = lambda seg: within((l, r), seg)
                within_stable_segs = map(within_this_seg, homog_segs)
                if any(within_stable_segs):
                    print "Stable segment: ", imo2iym(l), imo2iym(r)
                    if l == first: 
                        new_breakpoints.append(first)
                    continue
                
            ## The standard normal homogeneity test - which is the statistical test
            ## we'll use to see if there is a potential changepoint in this segment
            ## - requires us to normalize our paired difference series. We can do
            ## that in snht(), but we'll do it right now so we can inspect those
            ## standardized values later.
                z = standardize(segment, MISS)

            ## Apply standard normal homogeneity test. 
            ## For mechanics, see Alexandersson and Moberg 1997, Int'l Jrnl of
            ## Climatology (pp 25-34)
                likelihood_ratios = snht(z, MISS, standardized=True)
                z_count = len(get_valid_data(z))
                        
            ## We're left with the likelihood ratio for each value being a potential
            ## changepoint. Find the max ratio, and if that value is significant, let
            ## it be the newest potential changepoint.
                ind_max_ratio = 0
                max_ratio = 0.0
                clip_ratios = likelihood_ratios[2:-2] # clip the beginning and end,
                                                      # they can't be changepoints.
                for (ind, ratio) in zip(xrange(len(clip_ratios)), clip_ratios):
                    if ratio > max_ratio:
                        ind_max_ratio = ind
                        max_ratio = ratio
            ## Now we find the critical value for this data set, and check our max
            ## likelihood ratio against it
                crit_val = lrt_lookup(z_count)
                
                # The possible changepoint is the index of the max ratio we found. 
                # We have to shift it the following ways to align it to the original
                # data -
                #    1) shift by 2 re-aligns it from clip_ratios to likelihood_ratios
                #    2) shift by adjust re-aligns it to this segment in diff_data
                #    3) shift by l re-aligns it to the first index in diff_data
                possible_changepoint = l + ind_max_ratio + 2 + adjust
                
                y_new, m_new = imo2iym(possible_changepoint) # year, month
                
            ## If this is the first iteration, we indicate as such, and add the new
            ## changepoint
                if iter == 0: 
                    print "%6s-%6s MD        FIRST series %4d %2d to %4d %2d | at %4d %2d ts: %4.2f limit >: %3.2f" % (id1,id2,y1,m1,y2,m2,y_new,m_new,max_ratio,crit_val)
                    breakpoints.append(possible_changepoint)
                    breakpoints = sorted(breakpoints)
            
                else:
            ## Else, if we found a new possible changepoint, add it to our list.
                    if max_ratio > crit_val:
                        print "%6s-%6s MD Inhomogenity for series %4d %2d to %4d %2d | at %4d %2d ts: %4.2f limit >: %3.2f %4d" % (id1,id2,y1,m1,y2,m2,y_new,m_new,max_ratio,crit_val,z_count)
                        new_breakpoints.append(possible_changepoint)
                        
            ## If not, record that we found a homogeneous segment.   
                    else:
                        print "%6s-%6s MD      Homogeneous series %4d %2d to %4d %2d | at %4d %2d ts: %4.2f limit >: %3.2f %4d" % (id1,id2,y1,m1,y2,m2,y_new,m_new,max_ratio,crit_val,z_count)
                        new_homog_segs.append((l, r))
            
            ## Now we need to update our account of which segments were homogeneous,
            ## because we need to know during the next iteration. We will do this,
            ## as well as condense stable segments that lie adjacent to each other
            ## i.e, if we have the segments [(1,5), (5, 10,),, (12, 15)], then we 
            ## really have [(1,10), (12, 15)].
            homog_segs.extend(new_homog_segs)
            if homog_segs:
                homog_segs = sorted(homog_segs, key=operator.itemgetter(0))
                final_homog_segs = [homog_segs[0], ] # this will be like a stack
                for seg in homog_segs[1:]:
                    last_seg = final_homog_segs[-1]
                    if last_seg[1] == seg[0]:
                        new_seg = (last_seg[0], seg[1])
                        final_homog_segs.pop()
                        final_homog_segs.append(new_seg)
                    else:
                        final_homog_segs.append(seg)
                homog_segs = final_homog_segs
        
            ## So we have new segments that can be generated from these new
            ## breakpoints. Now, the PHA routine enters a "merge" process
            ## to see whether or not to keep these newly found changepoints or throw
            ## them out as false alarms. 
            ##
            ## We do this by "leapfrogging" every other breakpoint. This gives us
            ## a set of segments that all have another breakpoint in them. We want
            ## to see if these segments are homogeneous, because if they are, it
            ## means that the breakpoint we previously found in the segment has 
            ## been superseded.
            new_breakpoints = sorted(new_breakpoints)
            seg_bounds = zip(new_breakpoints[:-2], new_breakpoints[2:])
            
            remove_breakpoints = set()
            merged_breakpoints = set()
            if iter > 0:
                
                print "Merge segments (isplit = 0), ipass: "******"Stable segment: ", imo2iym(l), imo2iym(r)
    #                    if l == first: 
    #                        new_breakpoints.append(first)
    #                    seg_lookup.append(((l, r), 'stable'))
    #                    continue
                    # Set the within() method to check if this segment is within any
                    # previously found homogenous ones. Use lambda, since we can't pass
                    # keyword or positional arguments to map().
                    within_this_seg = lambda seg: within((l, r), seg)
                    within_stable_segs = map(within_this_seg, homog_segs)
                    if any(within_stable_segs):
                        print "Stable segment: ", imo2iym(l), imo2iym(r)
                        #if l == first: 
                        #    new_breakpoints.append(first)
                        merged_breakpoints.update([l, r])
                        continue
            
            ## Apply the same adjustments and the same standard normal homogeneity
            ## test that we did in the previous splitting process. There is no 
            ## difference here until we consider what to do if we find a new 
            ## homogeneous segment.
                    adjust = int(seg_bounds.index((l, r)) > 0)
                    segment = diff_data[l+adjust:r+1]
                    
                    z = standardize(segment, MISS)
                    likelihood_ratios = snht(z, MISS, standardized=True)
                    z_count = len(get_valid_data(z))
                        
                    ind_max_ratio = 0
                    max_ratio = 0.0
                    clip_ratios = likelihood_ratios[2:-2] # We clip the beginning and end
                    for (ind, ratio) in zip(xrange(len(clip_ratios)), clip_ratios):
                        if ratio > max_ratio:
                            ind_max_ratio = ind
                            max_ratio = ratio
                            
                    crit_val = lrt_lookup(z_count)
                    possible_changepoint = l + ind_max_ratio + 2 + adjust
                    
                    y_new, m_new = imo2iym(possible_changepoint)
                    
    
                    if z_count < 2:
                        y1, m1 = imo2iym(l)
                        y2, m2 = imo2iym(r)
                        print "%6s-%6s MD  No found peaks %4d %2d to %4d %2d" % (id1,id2,y1,m1,y2,m2)
                        print "%6s-%6s MD  Compress 1 out peak at %4d %2d" % (id1,id2,y_new,m_new)
                        #remove_breakpoints.add_
            ## If we found a new breakpoint that is statistically significant, then
            ## great! Let's keep it.
                    if max_ratio > crit_val:
                        print "%6s-%6s MD  Peak kept in merge at %4d %2d | ts: %4.2f limit >: %3.2f" % (id1,id2,y_new,m_new,max_ratio,crit_val)
                        merged_breakpoints.add(l)
                        merged_breakpoints.add(new_bp)
                        merged_breakpoints.add(r)
            ## If not, then this segment was homogeneous, so the breakpoint which
            ## already exists in it is no good.
                    else:
                        print "%6s-%6s MD Compress 2 out peak at %4d %2d | ts: %4.2f limit >: %3.2f" % (id1,id2,y_new,m_new,max_ratio,crit_val)
                        # Crap, if there are any potential breakpoints in this segment,
                        # we need to remove them because this segment is homogeneous. Let's
                        # remember this homogeneous segment for now and come back once
                        # we've found all of them.    
                        merged_breakpoints.update([l, r])
                        remove_breakpoints.add(new_bp)
            
            ## At this point, we have a set of all the breakpoints we've accumulated
            ## during this iteration of split/merge, as well as a set of breakpoints
            ## which we've found to be of no further use. We can difference update
            ## our set of breakpoints to remove these guys, and let those merged
            ## breakpoints be the set of newest breakpoints for the next splitmerge
            ## iteration.
                merged_breakpoints.difference_update(remove_breakpoints)
                breakpoints = list(merged_breakpoints)
            
            breakpoints = sorted(breakpoints)
            
            ## Did we actually find new breakpoints? If not, then we're done
            ## with splitmerge and can move on to the BIC process.
            enter_BIC = (breakpoints == last_breakpoints)
            iter = iter + 1
            
        ## Okay wow, we've potentially made it to the BIC stage now... !
        if first not in breakpoints:
            breakpoints.insert(0, first)
        ym_breakpoints = map(imo2iym, breakpoints)
        #print ym_breakpoints
        
        ## ENTERING MINBIC    
        bp_dictionary = dict()
####################################
##### MULTIPROCESS
        from multiprocessing import Pool

        global counter
        multi_bp_dict = {}
        counter = 0
        def cb(r):
            global counter
            #print counter, r
            counter += 1
        
        start = time.clock()         
        po = Pool(processes=4)
        for left,bp,right in zip(breakpoints[0:], breakpoints[1:], breakpoints[2:]):
                    
            if left != first:
                left = left + 1
            # recall that we only consider data after the first full year. we will be 
            # computing regressions with the independent variable indexed from this 
            # starting point, so we need to shift these indices. we also need to shift them
            # by +1 if this is any segment beyond the first one, so that we don't include
            # changepoints in more than one analysis.
            # TOTAL_SHIFT = -12 + 1 = -11
            # 
            # However, this shift is only necessary while looking at the array indices that
            # we generate using range(). the data should already be aligned correctly.
            total_shift = -12 + 1
            left_shift, bp_shift, right_shift = left+total_shift, bp+total_shift, right+total_shift
            y1, m1 = imo2iym(left)
            yb, mb = imo2iym(bp)
            y2, m2 = imo2iym(right)
            #print "Entering MINBIC - %4d %2d    %4d %2d    %4d %2d" % (y1, m1, yb,
            #                                                           mb, y2, m2)
            (seg_x, seg_data) = range(left_shift, right_shift+1), diff_data[left:right+1]
            bp_index = bp-left
            #print len(seg_x), len(seg_data), bp_index
            #bp_analysis = minbic(seg_x, seg_data, bp_index, MISS)
            multi_bp_dict[bp] = po.apply_async(minbic,(seg_x,seg_data,bp_index,MISS,),callback=cb)
        po.close()
        po.join()
        for bp in multi_bp_dict:
            r = multi_bp_dict[bp]
            multi_bp_dict[bp] = r.get()
        #print "counter - %d" % counter
        elapsed = (time.clock() - start)
        print "ELAPSED TIME - %2.3e" % elapsed
        #print new_bp_dict
####################################
##### NORMAL        
#        start = time.clock()
#        for left,bp,right in zip(breakpoints[0:], breakpoints[1:], breakpoints[2:]):
#                    
#            if left != first:
#                left = left + 1
#            # recall that we only consider data after the first full year. we will be 
#            # computing regressions with the independent variable indexed from this 
#            # starting point, so we need to shift these indices. we also need to shift them
#            # by +1 if this is any segment beyond the first one, so that we don't include
#            # changepoints in more than one analysis.
#            # TOTAL_SHIFT = -12 + 1 = -11
#            # 
#            # However, this shift is only necessary while looking at the array indices that
#            # we generate using range(). the data should already be aligned correctly.
#            total_shift = -12 + 1
#            left_shift, bp_shift, right_shift = left+total_shift, bp+total_shift, right+total_shift
#            y1, m1 = imo2iym(left)
#            yb, mb = imo2iym(bp)
#            y2, m2 = imo2iym(right)
#            print "Entering MINBIC - %4d %2d    %4d %2d    %4d %2d" % (y1, m1, yb,
#                                                                       mb, y2, m2)
#            (seg_x, seg_data) = range(left_shift, right_shift+1), diff_data[left:right+1]
#            bp_index = bp-left
#            #print len(seg_x), len(seg_data), bp_index
#            bp_analysis = minbic(seg_x, seg_data, bp_index, MISS)
#            
#            bp_dictionary[bp] = bp_analysis    
#        elapsed2 = (time.clock() - start)
#        print "ELAPSED TIME = %3.2e" % elapsed2
        
        ##################################3
        ## Print the adjustment summaries
        bp_dictionary = multi_bp_dict
        sorted_bps = sorted(bp_dictionary.keys())
        ndelete = []
        valid_bps = {}
        for bp in sorted_bps:
            stats = bp_dictionary[bp]
            
            cmodel=stats['cmodel']
            iqtype=stats['iqtype']
            asigx=stats['offset']
            azscr=stats['offset_z']
            rslp=stats['slopes']
            
            end1 = bp
            y_end1, m_end1 = imo2iym(end1)
            beg2 = bp+1
            y_beg2, m_beg2 = imo2iym(beg2)
            
            # If cmodel is *SLR*, then there is no breakpoint
            if 'SLR' in cmodel:
                print ("%s-%s  --  -- MD TESTSEG SKIP: %7.2f %5d %5d %3d %5d %5d %3d" %
                       (id1, id2, asigx, end1, y_end1, m_end1, beg2, y_beg2, m_beg2))
                # Don't store it!
            else:
                print ("%6s-%6s  --  -- MD TESTSEG ADJ: %7.2f %7.2f %8.4f %8.4f %5d %5d %3d %5d %5d %3d %2d" % 
                       (id1,id2, asigx, azscr, rslp[0], rslp[1], end1, y_end1, m_end1, beg2, y_beg2, m_beg2, iqtype))
                # Store it!
                valid_bps[bp] = stats
        
        ###############################
        ## Go back and see if we can get rid of some of the change points.
        ## If 2 or more of the chgpts are within MINLEN,
        ##    a) if the chgpt estimates are the same sign, then test each
        ##        singly with same endpoints and keep lowest BIC 
        ##    b) if not the same sign,
        ##        retain earliest changepoint
        # add the first, last to valid_bps
        interior_bps = valid_bps.keys()
        # Add first, last if not already in interior_bps
        for bp in [first, last]:
            if bp not in interior_bps:
                interior_bps.append(bp)
        sorted_bps = sorted(interior_bps)
        for left in sorted_bps:
            print sorted_bps, left
            ## We're looking for the next interim breakpoint that satisfies two
            ## conditions:
            ##    1) at least MINLEN valid data (non-missing to the right)
            ##    2) has at least one breakpoint between 'left' and it
            right = 0
            close_bps = []
            for right in sorted_bps: 
                if right <= left: continue
                
                if not close_bps:
                    close_bps.append(right)
                else:
                    valid_between_bps = diff_data[close_bps[-1]:right]
                    valid_length = len(get_valid_data(valid_between_bps, MISS))
                    print imo2iym(close_bps[-1]),valid_length,imo2iym(right)
                    if valid_length > MINLEN:
                        break
                    close_bps.append(right)
            # We could actually run out of things in sorted_bps, and wind up with
            # right == close_bps[-1]. Detect that and break out of this analysis
            # if that happens.
            if close_bps[-1]==right: break
            
            if left != first:
                left = left + 1
            close_bp_results = {}
            for bp in close_bps:
                        
#                # recall that we only consider data after the first full year. we will be 
#                # computing regressions with the independent variable indexed from this 
#                # starting point, so we need to shift these indices. we also need to shift them
#                # by +1 if this is any segment beyond the first one, so that we don't include
#                # changepoints in more than one analysis.
#                # TOTAL_SHIFT = -12 + 1 = -11
#                # 
#                # However, this shift is only necessary while looking at the array indices that
#                # we generate using range(). the data should already be aligned correctly.
                total_shift = -12 + 1
                left_shift, bp_shift, right_shift = left+total_shift, bp+total_shift, right+total_shift
                y1, m1 = imo2iym(left)
                yb, mb = imo2iym(bp)
                y2, m2 = imo2iym(right)
                
                print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
                print y1,m1,"-",yb,mb,"-",y2,m2
                print "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
                
                (seg_x, seg_data) = range(left_shift, right_shift+1), diff_data[left:right+1]
                bp_index = bp-left
                bp_analysis = minbic(seg_x, seg_data, bp_index, MISS, kthslr0_on=True)
                
                cmodel=bp_analysis['cmodel']
                iqtype= bp_analysis['iqtype']
                offset= bp_analysis['offset']
                rslp= bp_analysis['slopes']
                crit_val = bp_analysis['crit_val']
                test_stat = bp_analysis['test_stat']
                bic = bp_analysis['bic']
                
                print ("Interim chgpt: %s %4d %2d %4d %2d %4d %2d %8.2f %8.2f %8.2f %8.2f %7.3f %7.3f %2d" %
                       (pair_str, y1, m1, yb, mb, y2, m2, bic, test_stat, crit_val, offset, rslp[0], rslp[1], iqtype))                    
                
                close_bp_results[bp] = bp_analysis

            # Now we have a small problem... we might have more than one breakpoint,
            # so we need to choose which one is best. We will check the sign of
            # the breakpoint amplitude changes:
            sign_of_amps = map(sign, [close_bp_results[bp]['offset'] for bp in close_bps])
            positive = lambda x: sign(x) >= 0
            negative = lambda x: sign(x) <= 0
            zero = lambda x: sign(x) == 0
            print "------------>",[close_bp_results[bp]['offset'] for bp in close_bps]
            if (all(map(positive, sign_of_amps)) or 
                all(map(negative, sign_of_amps))):    
                # Pick the best (minimum BIC)          
                bics = [(bp, close_bp_results[bp]['bic']) for bp in close_bps]
                sorted_bics = sorted(bics, key=operator.itemgetter(1))
                smallest_bp = sorted_bics[0][0]
                
                # Remove this smallest-bic bp from the in-interval bps 
                close_bps.remove(smallest_bp)
                valid_bps[smallest_bp] = close_bp_results[smallest_bp] 
                
                #print "leftovers",close_bps
                for bp in close_bps: # The remaining bps which we will reject
                    sorted_bps.remove(bp) # Remove them from this loop
                    del valid_bps[bp] # Remove them as valid 
                    
                yb, mb = imo2iym(smallest_bp)
                print ("Same domain - Lowest Interim: %s %4d %2d" % 
                       (pair_str, yb, mb))
            elif (all(map(zero, sign_of_amps))):
                # Choose the earliest changepoint; the rest of these have
                # amplitude changes which are 0.
                first_bp, last_bp = close_bps[0], close_bps[-1]
                
                # Remove the first interim bp and update valid_bps with this new
                # computation. 
                close_bps.remove(first_bp)
                valid_bps[first_bp] = close_bp_results[first_bp]
                
                # Reject remaining interim bps
                for bp in close_bps:
                    sorted_bps.remove(bp)
                    del valid_bps[bp]
                    
                yb, mb = imo2iym(first_bp)
                print ("Null domain - Earliest Interim : %s %4d %2d" %
                       (pair_str, yb, mb))
            else:
                # We'll use the earliest interim changepoint, but we need
                # to get rid of bad data. Replace all the data between the 
                # interim changepoints as missing and re-compute BIC.
                first_bp, last_bp = close_bps[0], close_bps[-1]
                first_bp_index = first_bp-left
                last_bp_index = last_bp-left
                
                print len(seg_x), len(seg_data)
                print first_bp_index+1, last_bp_index+1
                print left, bp, right
                for i in range(first_bp_index+1, last_bp_index+1):
                    print i, imo2iym(i), i+left, imo2iym(i+left)
                    seg_x[i] = MISS
                    seg_data[i] = MISS
                    # Recall that seg_data[0] == diff_data[left]. ndelete records
                    # the *true month where there is unviable data*, so it needs to
                    # point back to the original element in diff_data we are 
                    # worried about.
                    ndelete.append(i+left) 
                bp_analysis = minbic(seg_x, seg_data, first_bp_index, MISS, kthslr0_on=True)
                
                # Remove the first interim bp and update valid_bps with this new
                # computation. 
                close_bps.remove(first_bp)
                valid_bps[first_bp] = bp_analysis
                
                # Reject remaining interim bps
                for bp in close_bps:
                    sorted_bps.remove(bp)
                    del valid_bps[bp]
                
                yb, mb = imo2iym(first_bp)
                print ("Diff domain - Earliest Interim : %s %4d %2d" %
                       (pair_str, yb, mb))                
    
        ## Remove changepoints which are an SLR model.
        nspan = [0]*num_months
        bp_count = 1
        for bp in sorted(valid_bps.keys()):
            bp_analysis = valid_bps[bp]
            
            if "SLR" in bp_analysis['cmodel']:
                del valid_bps[bp]
                continue
            
            print "   IN: ",bp
            nspan[bp] = bp_count
            ## If adjacent months are missing next to this breakpoint, then
            ## assume that those could be a breakpoint as well and copy this
            ## breakpoint's analysis results for them.
            for month in range(bp+1, last):
                if (month in ndelete) or (diff_data[month] == MISS):
                    nspan[month] = bp_count
                    print "   IN: ",month
                    valid_bps[month] = bp_analysis
                else:
                    break
            bp_count += 1
            
        valid_bps['del'] = ndelete
        valid_bps['nspan'] = nspan
        pair_results[pair_str] = valid_bps
        
        #print "ELAPSED TIMES = %3.2e %3.2e" % (elapsed1, elapsed2)
    print "done"
    ##
    import pickle
    f = open("pair_results", 'w')
    pickle.dump(pair_results, f)
    return pair_results