def _2d_var_detection(data, var, dvar, window, dist, thres, min_levels, verbose=0): """ Detect Breakpoints from two variables 1. SNHT per level (both variables) 2. Significant peaks within proximity of each other ? Parameters ---------- data var dvar window dist thres min_levels verbose Returns ------- """ # TODO 2d_var_detection per level stest1 = np.squeeze(np.apply_along_axis(snht, 0, data[var].values, window, window / 4)) stest2 = np.squeeze(np.apply_along_axis(snht, 0, data[dvar].values, window, window / 4)) data['%s_snht' % var] = stest1 data['%s_snht' % dvar] = stest2 data['%s_breaks' % var] = 0 imax1 = np.asarray(local_maxima(np.sum(stest1, 1), dist=dist)) # for all levels imax2 = np.asarray(local_maxima(np.sum(stest2, 1), dist=dist)) # for all levels # TODO how to combine !? # TODO add => summarized peak # TODO are there sign. breaks in the other variable? if len(imax1) == 0: print "No Breakpoints detected: %s min_levels (%d) found: %f" % (var, min_levels, np.max(stest1)) return data if verbose > 0: print "Local Maxima (%s): %d" % (var, len(imax1)) # how many above threshold tstat = np.sum(stest1[imax1, :] >= thres, 1) if not np.any(tstat > min_levels): print "No Breakpoints detected: %s min_levels (%d) found: %d" % (var, min_levels, np.max(tstat)) else: print "Final Breaks (%s): %d" % (var, np.sum(tstat > min_levels)) itx = imax1[tstat >= min_levels] # indices of breaks ivar = data.items.get_loc("%s_breaks" % var) # index of variable iarray = 1 + np.int_(stest1[itx, :] > thres) data.iloc[ivar, itx, :] = iarray[np.newaxis, ::] # (1,...) is required to set return
def _2d_detection(data, var, window, dist, thres, min_levels, verbose=0): """ Detect Breakpoints in 2D (time x pressure levels) 1. Run SNHT per level 2. Count significant peaks from all levels 2.1. Local Maxima (>END) 2.2. Above Threshold 3. add breaks variable with 1 and 2 (break in that level) Parameters ---------- data Panel Inputdata (vars, time, p) var str Variable to consider window float Window size of Detection dist float distance between breakpoints thres int SNHT threshold min_levels int Minimum required levels to detect breakpoint verbose Returns ------- """ stest = np.squeeze(np.apply_along_axis(snht, 0, data[var].values, window, window / 4)) # per level data['%s_snht' % var] = stest data['%s_breaks' % var] = 0 imax = np.asarray(local_maxima(np.sum(stest, 1), dist=dist)) # for all levels if len(imax) == 0: print_verbose( "No Breakpoints detected: %s min_levels (%d) found: %f (%f)" % (var, min_levels, np.max(stest), thres), verbose) return False print_verbose("Local Maxima (%s): %d" % (var, len(imax)), verbose) # how many above threshold tstat = np.sum(stest[imax, :] > thres, 1) # could weight levels ? upper levels are less relevant? if not np.any(tstat >= min_levels): print_verbose("No Breakpoints detected: %s min_levels (%d) found: %d" % (var, min_levels, np.max(tstat)), verbose) return False else: if verbose > 1: print "Breaks (%s): " % var for i, ib in enumerate(tstat): print "%s (%d) %s" % (str(data.major_axis[imax[i]]), ib, color_boolean(ib >= min_levels)) print_verbose("Final Breaks (%s): %d" % (var, np.sum(tstat >= min_levels)), verbose) itx = imax[tstat >= min_levels] # indices of breaks ivar = data.items.get_loc("%s_breaks" % var) # index of variable iarray = 1 + np.int_(stest[itx, :] > thres) data.iloc[ivar, itx, :] = iarray[np.newaxis, ::] # (1,...) is required to set return True
def _1d_detection(data, var, window, dist, thres, verbose=0): stest = snht(data[var].values, window, window / 4) data['%s_snht' % var] = stest # could be too close together imax = np.asarray(local_maxima(stest, dist=dist)) # local maxima within a certain distance! if len(imax) == 0: print_verbose("No Breakpoints detected: %s found: %f (%f)" % (var, np.max(stest), thres), verbose) return False print_verbose("Local Maxima (%s): %d" % (var, len(imax)), verbose) tstat = stest[imax] > thres # above threshold if not np.any(tstat > 0): print_verbose("No Breakpoints detected: %s" % (var), verbose) return False else: print_verbose("Final Breaks (%s): %d" % (var, np.sum(tstat > 0)), verbose) data['%s_breaks' % var] = 0 ivar = data.columns.get_loc('%s_breaks' % var) data.iloc[imax[tstat > 0], ivar] = 2 return True
def detection_wrapper(arg): from snht import snht from support_functions import local_maxima global testdata # data needs to be shared too all processes ? window, dist, thres, min_levels = arg params = {"window": window, "dist": dist, "thres": thres, "level": min_levels, "nbreaks": 0, "vert": -1} # stest = np.squeeze(np.apply_along_axis(snht, 0, testdata.values, window, window / 4)) # per level imax = np.asarray(local_maxima(np.sum(stest, 1), dist=dist)) # for all levels if len(imax) == 0: return params, [] # no local maxima tstat = np.sum(stest[imax, :] > thres, 1) if not np.any(tstat >= min_levels): return params, [] # not enough above threshold itx = imax[tstat >= min_levels] # indices of breaks ibreaks = testdata.index[itx].tolist() itl = np.median(np.where(stest[itx] > thres)[1]) # above threshold / mean of levels -> vertical position params.update({"nbreaks": len(itx), "vert": itl}) return params, ibreaks