Exemplo n.º 1
0
    def fit_line(self, x, y, startvalues=None):
        #        popt, pcov = curve_fit(self.linear, x, y, p0=startvalues)
        #        return popt
        #slope = np.median((y[1:]-y[:-1]))/np.median((x[1:]-x[:-1]))
        #slope = np.median((y[1:]-y[:-1])/(x[1:]-x[:-1]))
        #return np.array((slope, np.median(y[1:-1]) - slope*np.median(x[1:-1])))

        #return theilslopes(y, x)[0:2]

        x_part = theilslopes(x)[0:2]
        y_part = theilslopes(y)[0:2]
        p = np.array((y_part[1], x_part[1]))
        q = np.array((y_part[0], x_part[0])) + p
        return (p, q)
Exemplo n.º 2
0
    def trend_theilsen(self, alpha=0.05):
        from scipy.stats.mstats import theilslopes

        xaxis = self.data_ts.index.to_julian_date().values
        yaxis = self.data_ts.values

        theilsen_result = theilslopes(yaxis, x=xaxis, alpha=alpha)
        slope, intercept, slope_low, slope_up = theilsen_result
        self.fitted['theilsen'] = xaxis * slope + intercept
        assert (slope_low <= slope <= slope_up)  # Just to be safe, check this
        slope_sign = np.sign(slope)
        if not slope_low < 0.0 < slope_up:
            sign = int(np.sign(slope))
        else:
            sign = int(0)

        results_dict = {}
        results_dict['sign'] = sign
        results_dict['slope'] = slope * (365.25 * 10)  # From /day to /decade
        results_dict['slope_low'] = slope_low * (365.25 * 10
                                                 )  # From /day to /decade
        results_dict['slope_up'] = slope_up * (365.25 * 10
                                               )  # From /day to /decade

        # Add a sign to Theilsen
        self.__add_to_logbook__('Calculated Theil-Sen slope')
        results_dict['method'] = 'theilsen'
        results_dict['pvalue'] = None  # Trend Theilsen has no pvalue
        return results_dict
Exemplo n.º 3
0
def get_TheilSen(_y, what="slope"):
    if not np.ma.is_masked(_y):
        if what=="slope":
            return mstats.theilslopes(np.ma.masked_invalid(_y))[0]
        else:
            _x=np.arange(len(_y))
            return kendalltau(_x, _y, nan_policy='omit')[1]
    return np.nan
Exemplo n.º 4
0
def test_theilslopes():
    # Test for basic slope and intercept.
    slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1])
    assert_almost_equal(slope, 0.5)
    assert_almost_equal(intercept, 0.5)

    # Test for correct masking.
    y = np.ma.array([0, 1, 100, 1], mask=[False, False, True, False])
    slope, intercept, lower, upper = mstats.theilslopes(y)
    assert_almost_equal(slope, 1. / 3)
    assert_almost_equal(intercept, 2. / 3)

    # Test of confidence intervals from example in Sen (1968).
    x = [1, 2, 3, 4, 10, 12, 18]
    y = [9, 15, 19, 20, 45, 55, 78]
    slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
    assert_almost_equal(slope, 4)
    assert_almost_equal(upper, 4.38, decimal=2)
    assert_almost_equal(lower, 3.71, decimal=2)
Exemplo n.º 5
0
def test_theilslopes():
    # Test for basic slope and intercept.
    slope, intercept, lower, upper = mstats.theilslopes([0,1,1])
    assert_almost_equal(slope, 0.5)
    assert_almost_equal(intercept, 0.5)

    # Test for correct masking.
    y = np.ma.array([0,1,100,1], mask=[False, False, True, False])
    slope, intercept, lower, upper = mstats.theilslopes(y)
    assert_almost_equal(slope, 1./3)
    assert_almost_equal(intercept, 2./3)

    # Test of confidence intervals from example in Sen (1968).
    x = [1, 2, 3, 4, 10, 12, 18]
    y = [9, 15, 19, 20, 45, 55, 78]
    slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07)
    assert_almost_equal(slope, 4)
    assert_almost_equal(upper, 4.38, decimal=2)
    assert_almost_equal(lower, 3.71, decimal=2)
Exemplo n.º 6
0
def get_TheilSen(_x, what, _nboot, _y):
    import numpy as np
    import pandas as pd
    #the x y are weird, it appears that apply passes the dataframe column as last element
    from arch.bootstrap import StationaryBootstrap, IIDBootstrap
    from scipy.stats import mstats, mannwhitneyu, t, kendalltau
    from statsmodels.distributions.empirical_distribution import ECDF
    try:
        if what=="slope":
            return mstats.theilslopes(np.ma.masked_invalid(_y.values), _x)[0]*86400*365*1000000000
        elif what=="pval_tau":
            return kendalltau(_x, _y)[1]/2
        elif what=="pval_autocorr":            
            res0=mstats.theilslopes(_y, _x, alpha=0.95)[0]
            bs=StationaryBootstrap(3, np.array(range(len(_y))))
            bs_slopes=[]
            for data in bs.bootstrap(_nboot):
                ind=data[0][0]
                res=mstats.theilslopes(_y[ind], _x, alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(res0)
            if pvalue>0.5:
                pvalue=1-pvalue
#            print pvalue
            return pvalue
        elif what=="pval":
            bs=IIDBootstrap(np.array(range(len(_y))))
            bs_slopes=[]
            for data in bs.bootstrap(_nboot):
                ind=data[0][0]
                res=mstats.theilslopes(_y[ind], _x, alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(0)
            if pvalue>0.5:
                pvalue=1-pvalue
#            print pvalue
            return pvalue
    except:
        return np.nan
    def _populate_from_gc_bias_metrics(self, run_dir):
        for k, run_element in self.barcodes_info.items():
            if run_element.get('barcode') == 'unknown' or run_element[ELEMENT_NB_READS_PASS_FILTER] == 0:
                self.info('No reads for %s, not expecting GC bias data', run_element['run_element_id'])
                continue

            metrics_file = util.find_file(
                run_dir,
                run_element['project_id'],
                run_element['sample_id'],
                '*_S*_L00%s_gc_bias.metrics' % run_element['lane']
            )

            with open(metrics_file) as f:
                header = ''
                while not header.startswith('ACCUMULATION_LEVEL'):
                    header = f.readline()

                reader = csv.DictReader(f, header.split('\t'), delimiter='\t')
                lines = [l for l in reader]

                # gc slope
                data_points = [float(l['NORMALIZED_COVERAGE']) for l in lines if 20 <= int(l['GC']) <= 80]
                gc_slope = theilslopes(data_points)
                self.info('Calculated a GC slope of %s from %s data points', gc_slope, len(data_points))

                # deviation from normal
                total_windows = sum([int(l['WINDOWS']) for l in lines])
                # total_windows * 0.0004 gives approximately the same number of data points as 20 <= GC <= 80
                threshold = total_windows * 0.0004
                diffs = [abs(1 - float(l['NORMALIZED_COVERAGE'])) for l in lines if int(l['WINDOWS']) > threshold]
                normal_dev = sum(diffs) / len(diffs)
                self.info('Calculated a normal deviation of %s from %s data points', normal_dev, len(diffs))

                run_element['gc_bias'] = {
                    'slope': gc_slope[0],
                    'mean_deviation': normal_dev
                }
Exemplo n.º 8
0
mm2=pd.DataFrame(tc2)
mm2['yearfrac']=np.arange(0,len(tc2))/12.

ssd2 = decompose(tc2, frequency=12, s_window=35, robust=True, s_degree=0)
mm2['deseas']=ssd2['trend']+ssd2['residual']

tc=pd.Series(data=df['total_column']).resample('M').mean()
mm=pd.DataFrame(tc)
mm['yearfrac']=np.arange(0,len(tc))/12.

ssd = decompose(tc, frequency=12, s_window=35, robust=True, s_degree=0)
mm['deseas']=ssd['trend']+ssd['residual']



s1=theilslopes(mm1['deseas'],mm1['yearfrac'])
s2=theilslopes(mm2['deseas'],mm2['yearfrac'])
mm1['trend']=mm1['yearfrac']*s1[0]+s1[1]
mm2['trend']=mm2['yearfrac']*s2[0]+s2[1]

#plt.plot(df1.index.to_pydatetime(),df1['total_column'],'.')
plt.plot(mm1.index.to_pydatetime(),mm1['total_column'])
plt.plot(mm1.index.to_pydatetime(),mm1['deseas'])
plt.plot(mm1.index.to_pydatetime(),mm1['trend'])

plt.plot(mm2.index.to_pydatetime(),mm2['total_column'])
plt.plot(mm2.index.to_pydatetime(),mm2['deseas'])
plt.plot(mm2.index.to_pydatetime(),mm2['trend'])

#plt.xlim(datetime.date(2007,1,1),datetime.date(2009,1,1))
plt.show()
Exemplo n.º 9
0
    def calc_result_frame(self, trim=True):
        '''Return a result_frame

        Returns a result_frame which contains the charecteristics of each soiling interval.soiling.
        An updated version of the pm_frame is stored as self.pm_frame.

        Parameters
        ----------
        trim (bolean): whether to trim (remove) the first and last soiling intervals to avoid inclusion of partial intervals

        '''

        # Estimate slope of each soiling interval, store results in a dataframe
        result_list = []
        if trim:
            res_loop = sorted(list(set(self['run'])))[1:-1]  # ignore first and last interval
        else:
            res_loop = sorted(list(set(self['run'])))

        for r in res_loop:
            run = self[self.run == r]
            length = (run.day[-1] - run.day[0])
            start_day = run.day[0]
            end_day = run.day[-1]
            run = run[run.pi_norm > 0]
            if len(run) > 2 and run.pi_norm.sum() > 0:
                fit = theilslopes(run.pi_norm, run.day)
                fit_poly = np.poly1d(fit[0:2])
                result_list.append({
                    'start': run.index[0],
                    'end': run.index[-1],
                    'length': length,
                    'run': r,
                    'run_slope': fit[0],
                    'run_slope_low': fit[2],
                    'run_slope_high': min([0.0, fit[3]]),
                    'max_neg_step': min(run.delta),
                    'start_loss': 1,
                    'clean_wo_precip': run.clean_wo_precip[0],
                    'inferred_start_loss': fit_poly(start_day),
                    'inferred_end_loss': fit_poly(end_day),
                    'valid': True
                })
            else:
                run = self[self.run == r]
                result_list.append({
                    'start': run.index[0],
                    'end': run.index[-1],
                    'length': length,
                    'run': r,
                    'run_slope': 0,
                    'run_slope_low': 0,
                    'run_slope_high': 0,
                    'max_neg_step': min(run.delta),
                    'start_loss': 1,
                    'clean_wo_precip': run.clean_wo_precip[0],
                    'inferred_start_loss': run.pi_norm.mean(),
                    'inferred_end_loss': run.pi_norm.mean(),
                    'valid': False
                })
        results = pd.DataFrame(result_list)

        if results.empty:
            raise NoValidIntervalError('No valid soiling intervals were found')

        # Filter results for each interval setting invalid interval to slope of 0
        results['slope_err'] = (results.run_slope_high - results.run_slope_low) / abs(results.run_slope)
        # critera for exclusions
        filt = (
            (results.run_slope > 0) |
            (results.slope_err > 5) |
            (results.max_neg_step <= -0.05)
        )

        results.loc[filt, 'run_slope'] = 0
        results.loc[filt, 'run_slope_low'] = 0
        results.loc[filt, 'run_slope_high'] = 0
        results.loc[filt, 'valid'] = False

        # Calculate the next inferred start loss from next valid interval
        results['next_inferred_start_loss'] = np.clip(results[results.valid].inferred_start_loss.shift(-1), 0, 1)
        # Calculate the inferred recovery at the end of each interval
        results['inferred_recovery'] = np.clip(results.next_inferred_start_loss - results.inferred_end_loss, 0, 1)

        # Don't consider data outside of first and last valid interverals
        if len(results[results.valid]) == 0:
            raise NoValidIntervalError('No valid soiling intervals were found')
        new_start = results[results.valid].start.iloc[0]
        new_end = results[results.valid].end.iloc[-1]
        pm_frame_out = self[new_start:new_end]
        pm_frame_out = pm_frame_out.reset_index().merge(results, how='left', on='run').set_index('date')

        pm_frame_out['loss_perfect_clean'] = np.nan
        pm_frame_out['loss_inferred_clean'] = np.nan
        pm_frame_out['days_since_clean'] = (pm_frame_out.index - pm_frame_out.start).dt.days

        # Caluclate the daily derate
        pm_frame_out['loss_perfect_clean'] = pm_frame_out.start_loss + pm_frame_out.days_since_clean * pm_frame_out.run_slope
        pm_frame_out.loss_perfect_clean = pm_frame_out.loss_perfect_clean.fillna(1)  # filling the flat intervals may need to be recalculated for different assumptions
        pm_frame_out['loss_inferred_clean'] = pm_frame_out.inferred_start_loss + pm_frame_out.days_since_clean * pm_frame_out.run_slope
        pm_frame_out.loss_inferred_clean = pm_frame_out.loss_inferred_clean.fillna(1)  # filling the flat intervals may need to be recalculated for different assumptions

        out = result_frame(results)
        out.pm_frame = pm_frame_out

        return out
def draw_corplot(x, y, xname, yname, add_robust=False, save_to_file=True, \
    ax=None, stats_title=True, stats_legend=False, customcol=None, \
    legendprefix=''):
    # Choose the right colour for the plot.
    if customcol is None:
        regress_col = PLOTCOLS['regression']
        sample_col = PLOTCOLS['samples']
        sample_alpha = 0.75
    else:
        regress_col = customcol
        sample_col = customcol
        sample_alpha = 0.75
    # Create a new plot.
    if ax is None:
        fig, ax = pyplot.subplots(nrows=1, ncols=1)
    # Plot a scatter plot of the x and y values.
    ax.plot(x, y, 'o', color=sample_col, alpha=sample_alpha)
    # Plot the regression line.
    if add_robust:
        # Perform a linear regression.
        slope, intercept, lo_slope, up_slope = theilslopes(y, x, alpha=0.95)
        # Plot the regression line.
        x_pred = numpy.array([numpy.min(x), numpy.max(x)])
        y_pred = slope * x_pred + intercept
        y_lo = lo_slope * x_pred + intercept
        y_up = up_slope * x_pred + intercept
        ax.plot(x_pred, y_pred, '-', color=regress_col)
        ax.fill_between(x_pred, y_lo, y_up, linewidth=3, alpha=0.2, \
            color=PLOTCOLS['regression'])
    # Perform a linear regression.
    model = linregress(x, y)
    try:
        r = model.rvalue
        p = model.pvalue
        slope = model.slope
        intercept = model.intercept
    except:
        slope, intercept, r, p, stderr = model
    # Perform a Spearman correlation.
    spearman = spearmanr(x, y)
    try:
        spearman_rho = spearman.correlation
        spearman_p = spearman.pvalue
    except:
        spearman_rho, spearman_p = spearman
    # Compute Kendall's Tau.
    kendall = kendalltau(x, y)
    try:
        kendall_tau = kendall.correlation
        kendall_p = kendall.pvalue
    except:
        kendall_tau, kendall_p = kendall
    # Set the regression line's label.
    if stats_legend:
        # Uncomment if you'd like to see both parametric and non-parametric
        # test results.
        #lbl = r"$R=%.2f, p=%.2f$" % (r, p)
        #lbl = lbl + "\n" + r"$\tau=%.2f, p=%.2f$" % (kendall_tau, kendall_p)
        # Show Kendall's tau, as we're using a lowish N.
        if kendall_p < 0.001:
            kendall_pstr = r"p<0.001"
        else:
            kendall_pstr = r"p=%.3f" % (kendall_p)
        lbl = r"%s$\tau=%.2f, %s$" % (legendprefix, kendall_tau, kendall_pstr)
    else:
        lbl = None
    # Plot the regression line.
    x_pred = numpy.array([numpy.min(x), numpy.max(x)])
    y_pred = slope * x_pred + intercept
    ax.plot(x_pred, y_pred, '-', color=regress_col, linewidth=3, label=lbl)
    # Finish the plot.
    ax.set_xlabel(xname.capitalize(), fontsize=FONTSIZE['label'])
    ax.set_ylabel(yname.capitalize(), fontsize=FONTSIZE['label'])
    if stats_title:
        ax.set_title("R=%.2f, p=%.3f; Rho=%.2f, p=%.3f; Tau=%.3f, p=%.3f" % \
            (r, p, spearman_rho, spearman_p, kendall_tau, kendall_p))
    if stats_legend:
        ax.legend(loc="best", fontsize=FONTSIZE['legend'])
    # Save the plot.
    if save_to_file:
        fig.savefig(os.path.join(OUTDIR, "corplot_%sx%s.png" % (xname, yname)))
    if ax is None:
        pyplot.close(fig)
Exemplo n.º 11
0
def subtract_psfs(image, psf, radius, x, y, e_limit=0.1, **kwargs):
        """
        Subtract PSFs from an image using linear regression

        Parameters
        ----------
        image : 2D array
            the image from which the PSFs with be subtracted

        psf :  2D array
            image of the PSF, doesn't need to be size-matched to the image

        radius : int
            radius of box around the PSF to be used for matching the height and 
            base

        x, y : list, array
            pixel coordinates

        e_limit : float 
            If the relative difference between thielslope fits is less than some 
            value, reject the subtraction
            i.e. (m_high - m_low) / m < 2 * e_limit
            
        
        Returns
        -------
        im_new : 2D array

        results : list
            The results of the fit as returned by ``scipy.stats.linregress``
            i.e. gradient, intercept, r-value, p-value, err


        """

        im_new = np.copy(image)

        if np.shape(psf)[0] < 2 * np.shape(image)[0]:
            w, h = np.shape(image)
            pw, ph = np.shape(psf)
            pad_w = int(w-pw/2)+1
            psf_pad = np.pad(psf, pad_w, mode="constant")
        else: 
            psf_pad = psf
        
        q = int(radius)
        cy, cx = np.where(psf_pad == psf_pad.max())
        cy, cx = cy[0], cx[0]

        psf_cutout = np.copy(psf_pad[cx-q : cx+q+1, cy-q : cy+q+1])
        psf_flat = psf_cutout.ravel()

        fit_results = []

        for xx, yy in zip(x, y):

            xii, yii = int(xx), int(yy)

            w, h = im_new.shape
            dx0, dx1 = xii, w-xii
            dy0, dy1 = yii, h-yii

            q1, q2, q3, q4 = min(q, dx0), min(q, dx1), min(q, dy0), min(q, dy1)
            im_cutout  = np.copy(im_new[xii-q1 : xii+q2+1, yii-q3 : yii+q4+1])
            im_flat = im_cutout.ravel()

            if len(psf_flat) != len(im_flat):
                fit_results += [[0]*7]
                continue


            m, c, r, p, e = linregress(psf_flat, im_flat)
            m, c, a, b = theilslopes(im_flat, psf_flat)
            e = 0.5*(b-a)/m

            # If it failes the null-hypothosis test - i.e. p > 0.1
            if p > 0.1:
                fit_results += [[0]*7]
                continue
            # If the relative difference between thielslope fits is less than 
            #   some value, reject the subtraction
            # i.e. the (m_high - m_low) < 2 * m * e_limit
            if e > e_limit:
                fit_results += [[0]*7]
                continue
            # If the fitted slope is less then zero, forget the fit
            if m < 0:
                fit_results += [[0]*7]
                continue
            else:
                fit_results += [[m, c, r, p, e, a, b]]
                

            psf_cutout = np.copy(psf_pad[cx-dx0 : cx+dx1, cy-dy0 : cy+dy1])
            psf_cutout *= m

            im_new[xii-dx0 : xii+dx1, yii-dy0 : yii+dy1] -= psf_cutout

        return im_new, fit_results
Exemplo n.º 12
0
    def compute_trend(self, start_year, stop_year, season=None, 
                      slope_confidence=.68):
        if slope_confidence is None:
            slope_confidence = .68
        if self._mobs is None:
            raise ValueError('Cannot compute trends: monthly data is not '
                             'available')
        mobs = self._mobs
        start_year, stop_year, period_str, yrs = _init_period(mobs, start_year, 
                                                              stop_year)
        
        if season in [None, 'all']:
            seas = 'all'
        elif season in SEASONS:
            seas = season
           
        if not 'seas' in self.yearly:
            self['yearly'][seas] = yearly = _get_yearly(mobs, seas, yrs)
        else:
            yearly = self['yearly'][seas]
        
        dates = yearly.index.values
        values = yearly.values 
        (start_date, 
         stop_date, 
         period_index, 
         num_dates_period) = _init_period_dates(start_year, stop_year, seas)
        
        # get period filter mask
        tmask = np.logical_and(dates>=start_date, 
                               dates<=stop_date) 
            
        # apply period mask to jsdate vector and value vector
        dates_data = dates[tmask]
        
        # vector containing data values
        vals = values[tmask]
        
        valid = ~np.isnan(vals)
        
        #works only on not nan values
        dates_data = dates_data[valid]
        vals = vals[valid]
        
        num_dates_data = dates_data.astype('datetime64[Y]').astype(np.float64)
        
        # create empty dictionary that is used to store trends results
        result = _init_trends_result_dict(start_year)
        
        #TODO: len(y) is number of years - 1 due to midseason averages
        result['n'] = len(vals)
        
        if len(vals) > 2:
            result['y_mean'] = np.nanmean(vals)
            result['y_min'] = np.nanmin(vals)
            result['y_max'] = np.nanmax(vals)
            
            #Mann / Kendall test
            [tau, pval] = kendalltau(x=num_dates_data, y=vals)
            
            
            (slope, 
             yoffs, 
             slope_low, 
             slope_up) = theilslopes(y=vals, x=num_dates_data, 
                                     alpha=slope_confidence)
            
            # estimate error of slope at input confidence level
            slope_err = np.mean([abs(slope - slope_low), 
                                 abs(slope - slope_up)])
            
            reg_data = slope * num_dates_data + yoffs
            reg_period = slope * num_dates_period  + yoffs
            
            
            # value used for normalisation of slope to compute trend T
            # T=m / v0
            v0_data = reg_data[0]
            v0_period = reg_period[0]
            
            # Compute the mean residual value, which is used to estimate
            # the uncertainty in the normalisation value used to compute
            # trend
            mean_residual = np.mean(np.abs(vals - reg_data))
            
            # trend is slope normalised by first reference value. 
            # 2 trends are computed, 1. the trend using the first value of
            # the regression line at the first available data year, 2. the
            # trend corresponding to the value corresponding to the first
            # year of the considered period.
            
            trend_data = slope / v0_data * 100
            trend_period =  slope / v0_period * 100
            
            # Compute errors of normalisation values
            v0_err_data = mean_residual
            t0_data, tN_data = num_dates_data[0], num_dates_data[-1]
            t0_period = num_dates_period[0]
            
            # sanity check
            assert t0_data < tN_data
            assert t0_period <= t0_data
            
            dt_ratio = (t0_data - t0_period) / (tN_data - t0_data)
            
            v0_err_period = v0_err_data * (1 + dt_ratio)
            
            trend_data_err = _compute_trend_error(m=slope,
                                                  m_err=slope_err,
                                                  v0=v0_data,
                                                  v0_err=v0_err_data)
                                                      
            trend_period_err = _compute_trend_error(m=slope,
                                                    m_err=slope_err,
                                                    v0=v0_period,
                                                    v0_err=v0_err_period)
                            
            result['pval'] = pval
            result['m'] = slope
            result['m_err'] =slope_err
            result['yoffs'] = yoffs
            
            result['slp'] = trend_data
            result['slp_err'] = trend_data_err
            result['reg0'] = v0_data
            tp, tperr, v0p = None, None, None
            if v0_period > 0:
                tp = trend_period
                tperr = trend_period_err
                v0p = v0_period
            result['slp_{}'.format(start_year)] = tp
            result['slp_{}_err'.format(start_year)] = tperr
            result['reg0_{}'.format(start_year)] = v0p
            result['period'] = period_str
    
        if not seas in self.results:
            self.results[seas] = od()
        self.results[seas][period_str] = result

        return result
Exemplo n.º 13
0
                y = sdatasok
                X, Y = [], []
                #only work with notnan values
                for i in range(0,len(x)):
                    if not np.isnan(x[i]) and not np.isnan(y[i]):
                        X.append(x[i])
                        Y.append(y[i])

                if lok>=nmkmin:
                    p_stat='yes'
                    #Mann-Kendall test
                    [tau,pval]=kendalltau(X,Y)
                    print(tau,pval)

                    #theil slope
                    res=theilslopes(Y,X,sig)
                    reg=res[0]*np.asarray(X)+res[1]*np.ones(len(X))
                    regg=res[0]*np.asarray(mods)+res[1]*np.ones(len(mods))
                    spyr=res[0]*365*100/abs(reg[0]) #% per year
                    reg0=reg[0]
                else:
                    tau, pval, spyr = float('NaN'), float('NaN'), float('NaN')
                str_tau = "%5.2f" % tau
                str_pval = "%5.4f" % pval
                str_a = "%4.1f" % spyr
        
                
            # - - - - - - - - Model - - - - - - - - -
            mod_tau, mod_pval, mod_spyr, mod_reg0 = float('NaN'), float('NaN'), float('NaN'), float('NaN')
            mod_X, mod_Y, mod_sdatas, mod_sdatasok, mod_sodsok = [], [], [], [], []
            lok=0
Exemplo n.º 14
0
def compute_trends_new(s_monthly, periods, only_yearly=True):
    #sm = to_monthly_current_trends_interface(s0, MIN_DIM)
    d = dict(month=s_monthly.index.month,
             year=s_monthly.index.year,
             value=s_monthly.values)

    mobs = pd.DataFrame(d)

    mobs['season'] = mobs.apply(
        lambda row: _get_season_new(row['month'], row['year']), axis=1)

    mobs = mobs.dropna(subset=['value'])
    seasons = ['JFM', 'AMJ', 'JAS', 'OND', 'all']
    #trends with yearly and seasonal averages

    # get all years that are contained in data
    yrs = np.unique(mobs['year'])

    data = {}
    for i, seas in enumerate(seasons):
        if only_yearly and not seas == 'all':
            continue
        #initialize seasonal object
        data[seas] = {'date': [], 'jsdate': [], 'val': [], 'trends': {}}

        dates = []
        #filter the months
        for yr in yrs:
            if seas != 'all':

                catch = mobs[mobs['season'].str.contains('{}-{}'.format(
                    seas, yr))]
            else:
                catch = mobs[mobs['season'].str.contains('-{}'.format(yr))]
            date = _mid_season_new(seas, yr)

            dates.append(date)

            #needs 4 seasons to compute seasonal average to avoid biases
            if seas == 'all' and len(np.unique(catch['season'].values)) < 4:
                data[seas]['val'].append(np.nan)
            else:
                data[seas]['val'].append(np.nanmean(catch['value']))
        data[seas]['date'] = np.asarray(dates)
        data[seas]['jsdate'] = to_jsdate(data[seas]['date'])
        #filter period
        for period in periods:
            data[seas]['trends'][period] = {}

            # desired start / stop year (note, that this may change if first
            # or last value in tseries (or both) is NaN)
            start_yr, stop_yr = years_from_periodstr(period)
            num_yrs = stop_yr - start_yr

            #filtering to the period limit
            jsp0 = to_jsdate(np.datetime64('{}-01-01'.format(start_yr)))
            jsp1 = to_jsdate(np.datetime64('{}-12-31'.format(stop_yr)))

            # vector containing numerical timestamps in javascript format
            jsdate = data[seas]['jsdate']

            # get period filter mask
            tmask = np.logical_and(jsdate >= jsp0, jsdate <= jsp1)

            # filter data by period
            jsdate = jsdate[tmask]

            # vector containing data values
            y = np.asarray(data[seas]['val'])[tmask]

            # =============================================================================
            #             num_leap_years = np.sum(dt_idx.is_leap_year)
            #
            #             secs_per_year = np.mean(([86400 * 365] * (num_yrs-num_leap_years) +
            #                                      [86400 * 366] * num_leap_years))
            #
            #
            # =============================================================================
            valid = ~np.isnan(y)

            # Remove NaNs for Mann-Kendall test and regression
            _jsdate = jsdate[valid]
            _y = y[valid]

            if len(_jsdate) > 2:

                #kendall
                [tau, pval] = kendalltau(_jsdate, _y)
                data[seas]['trends'][period]['pval'] = pval

                #theil slope
                res = theilslopes(_y, _jsdate, 0.9)
                slope = res[0]
                yoffs = res[1]
                # regression line (evaluate at ACTUAL time-stamps corresponding
                # to input period -> jsdate and not _jsdate, which may have
                # removed first or last year, or both)
                reg = slope * jsdate + yoffs

                # =============================================================================
                #                 # time difference between start and stop in full years.
                #                 dt = (np.datetime64(dates[-1]) -
                #                       np.datetime64(dates[0])).astype('timedelta64[s]').astype(int)
                #
                # =============================================================================
                # =============================================================================
                #                 from numpy.testing import assert_allclose
                #                 try:
                #                     assert_allclose(dt, secs_per_year * num_yrs, rtol=1e-3)
                #                 except:
                #                     print(start_yr, stop_yr)
                #                     print(dates[0], dates[-1])
                #                     print(yrs)
                # =============================================================================

                #dt = dt / secs_per_year #time diff in units of years

                # compute slope in units of %/yr-1 normalised by first value
                # of considered time-series
                slp = (reg[-1] - reg[0]) / (num_yrs * reg[0]) * 100

                data[seas]['trends'][period]['slp'] = slp
                data[seas]['trends'][period]['reg0'] = reg[0]
                data[seas]['trends'][period]['t0'] = jsdate[0]
                data[seas]['trends'][period]['n'] = len(y)
            else:
                data[seas]['trends'][period]['pval'] = None
                data[seas]['trends'][period]['slp'] = None
                data[seas]['trends'][period]['reg0'] = None
                data[seas]['trends'][period]['t0'] = None
                data[seas]['trends'][period]['n'] = len(y)
    return data
Exemplo n.º 15
0
def processInput_trends(subchunk, parent_iteration, child_iteration):
    """This is the main file that calculate trends"""

    #print('INFO: see pid.<pid>.out to monitor trend computation progress')
    #sys.stdout = open('pid.'+str(os.getpid()) + '.out', 'w')
    
    #print('INFO: see trend.out to monitor trend computation progress')
    #sys.stdout = open('trend.out', 'a')

    ## Debug tool to print process Ids
    process = psutil.Process(os.getpid())
    current = current_process()
    print(process, current._identity, '{} Mo'.format(process.memory_info().rss/1024/1024))

    if subchunk.input=='box':
        print('### Chunk {} > subchunk {} started: COL: [{}:{}] ROW: [{}:{}]'.format(parent_iteration, child_iteration, *subchunk.get_limits('local', 'str')))
        write_string0 = (param.hash+"_CHUNK" + np.str(parent_iteration)  
                         + "_SUBCHUNK" + np.str(child_iteration)   
                         + "_" + '_'.join(subchunk.get_limits('global', 'str'))
                         + '.nc')
        subchunk_fname = param.output_path / write_string0
        ## Check if cache file already exists and must be overwritten
        if not param.b_delete:
            if subchunk_fname.is_file():
                print ('INFO: {} already exists. Use -d option to overwrite it.'.format(write_string0))
                return

    elif subchunk.input=='points':
        print('### Chunk {} > subchunk {} started.'.format(parent_iteration, child_iteration))
        print(param.input_file)
        str_date_range = param.input_file.stem.replace('timeseries','')
        write_string0 = 'merged_trends{}.h5'.format(str_date_range) 
        subchunk_fname = param.output_path / write_string0
        ## Result file is always overwritten in the case of point input

    ## Read the input time series file from main chunk, configured length of time X 500 X 500; it may vary if different chunks are used
    hdf_ts = h5py.File(param.input_file, 'r')

    ## Create temporary storage with size of sub chunks in main chunk, currently configured 100 by 100 blocks
    var_temp_output = np.empty([*subchunk.dim,4])    
    var_temp_output[:] = np.nan
    # NaN matrix by default

    
    ## Parameters for te loop
    b_deb = 0 # flag to print time profiling
    t00 = timer()
    t000 = timer()
    t_mean = 0.
    #print(current._identity, f'{process.memory_info().rss/1024/1024} Mo')
    offsetx = subchunk.get_limits('local', 'tuple')[0]
    offsety = subchunk.get_limits('local', 'tuple')[2]

    print_freq = 20

    tab_prof_valid = []
    tab_prof_zero = []

    hf = h5py.File(subchunk_fname, 'w')

    for tsvar in hdf_ts['vars'].keys():

        for jj_sub in range(subchunk.dim[0]):
        #for jj_sub in range(61,80): #debug
            # dimension of variable: time,x,y
            # preload all the y data here to avoid overhead due to calling Dataset.variables at each iteration in the inner loop
            data_test0 = hdf_ts['vars/'+tsvar][:,jj_sub+offsety,offsetx:offsetx+subchunk.dim[1]]
            #data_test0 = hdf_ts.variables[tsvar][:500,sub_chunks_x[ii_sub],:]

            for ii_sub in range(subchunk.dim[1]):
            #for ii_sub in range(55,100):
                if b_deb: print('---------------')
                if b_deb: print('jj: {} - ii: {} '.format(jj_sub, ii_sub))
                
                t0 = timer()

                data_test = data_test0[:,ii_sub]

                ## remove tie group
                data_test[1:][np.diff(data_test)==0.] = np.nan
        
                #data_test=hdf_ts.variables[tsvar][:,sub_chunks_x[ii_sub],sub_chunks_y[jj_sub]]
                slope=999.0

                if b_deb:
                    print('t0', timer()-t0)
                    t0 = timer()

                if b_deb: print('Data valid:', data_test.size - np.isnan(data_test).sum(), '/', data_test.size)
                
                if 0:
                    print('Use mstats')
                    data_sen=np.ma.masked_array(data_test, mask=np.isnan(data_test))
                    t0 = timer()
                    slope, intercept, lo_slope, up_slope = mstats.theilslopes(data_sen, alpha=0.1)
                    print('slope, intercept, lo_slope, up_slop:')
                    print(slope, intercept, lo_slope, up_slope)
                    if b_deb: print('t02', timer()-t0)
                    np.savetxt('data_test.dat', data_test.T)
                    sys.exit()
                    t0 = timer()

                # this mstats give correct slope and is consistent with python man-kendall score of Sn; this is fast than Fortran'''
                # stats.theilslopes is giving incorrect values when NaN are inside data'''
                
                if b_deb:
                    print('t2', timer()-t0)
                    t0 = timer()

                if 1:
                    ## orinal mann-kendall test :
                    bla = data_test[~np.isnan(data_test)]
                    if bla.size > 0:
                        #print('min/mean/max/nb/nb_unique', bla.min(), bla.mean(), bla.max(), len(bla), len(np.unique(bla)))
                        #print(bla)
                        if len(np.unique(bla))==1:
                            p,z,Sn,nx = [0,0,0,0] 
                        else:
                            #data_test = data_test[-10:] # debug line to speed up
                            
                            #try:
                            #    p,z,Sn,nx = mk_test_timeout(data_test)
                            #except TimeoutError as e:
                            #    print('timeout!')
                            #    p,z,Sn,nx = [0,0,0,0] 
                            p,z,Sn,nx = m.mk_trend(len(data_test), np.arange(len(data_test)), data_test)
                    else:
                        p,z,Sn,nx = m.mk_trend(len(data_test), np.arange(len(data_test)), data_test)
                        # if data_test = [], the test return (p,z,Sn,nx) = (1.0, 0.0, 0.5, 0.0)
                else:
                    ## other test
                    p,z,Sn,nx = [0,0,0,0] 
                    z = data_test.mean()

                if b_deb:
                    t4 = timer()-t0
                    if bla.size>0:
                        if bla.mean()==0.0:
                            tab_prof_zero.append(t4)
                        else:
                            tab_prof_valid.append(t4)
                        
                    print('t4=', t4)
                    if 0:
                        import matplotlib.pyplot as plt
                        plt.clf()
                        plt.plot(bla)
                        plt.ylim(0,6.1)
                        ti1 = '{}/{} - {:.3f} s'.format(jj_sub, ii_sub, t4)
                        ti2 = 'min/mean/max/nb/nb_unique {:.3f} {:.3f} {:.3f} {} {}'.format(bla.min(), bla.mean(), bla.max(), len(bla), len(np.unique(bla)))
                        ti3 = 'slope: {}'.format(Sn)
                        plt.title(ti1+'\n'+ti2+'\n'+ti3)
                        if Sn==0.0:
                            plt.savefig('bla.Sn0.{}.{}.png'.format(jj_sub, ii_sub))
                        else:
                            plt.savefig('bla.{}.{}.png'.format(jj_sub, ii_sub))
                    t0 = timer()

                if b_deb: print('p,z,slope,nx', p,z,slope,nx)
                if b_deb: print('p,z,Sn,nx', p,z,Sn,nx)

                var_temp_output[jj_sub,ii_sub,0] = p
                var_temp_output[jj_sub,ii_sub,1] = z
                var_temp_output[jj_sub,ii_sub,2] = Sn
                var_temp_output[jj_sub,ii_sub,3] = nx

           
            ## Print efficiency stats
            if (jj_sub+1)%print_freq==0:
                elapsed = timer()-t00
                data_stat = hdf_ts.variables[tsvar][:,jj_sub+1+offsety-print_freq:jj_sub+1+offsety,offsetx:offsetx+subchunk.dim[1]]
                valid = 100.*(data_stat.size - np.count_nonzero(np.isnan(data_stat)))/data_stat.size
                eff = 1e6*elapsed/data_stat.size
                #print(subchunk.dim, data_test0.shape)
                print('{} : {}.{}.block[{}-{}] : {:.3f}s elapsed : {:.3f} us/pix/date : {:.2f}% valid'.format(datetime.datetime.now(), parent_iteration, child_iteration, jj_sub+1-print_freq, jj_sub+1, elapsed, eff, valid))

                t00 = timer()
                sys.stdout.flush()

            if 0:
                t_mean += timer()-t00
                #print(f't00 {ii_sub} {t_mean/(ii_sub+1)}')
                #print(f't00.p{current._identity[0]}.it{ii_sub} {t_mean/(ii_sub+1):.3f}s {process.memory_info().rss/1024/1024:.2f}Mo')
                print('t00.p{}.it{} {:.3f}s {:.2f}Mo'.format(current._identity[0], ii_sub, t_mean/(ii_sub+1), process.memory_info().rss/1024/1024))
                v = var_temp_output[ii_sub,:,:]
                for ii in range(4):
                    #print(f'{np.count_nonzero(np.isnan(v[:,ii]))/v[:,ii].size:.3f}', np.nanmin(v[:,ii]), np.nanmax(v[:,ii]))
                    print('{:.3f}'.format(np.count_nonzero(np.isnan(v[:,ii]))/v[:,ii].size), np.nanmin(v[:,ii]), np.nanmax(v[:,ii]))
                print(np.nanmin(v), np.nanmax(v))
                t00 = timer()
   
        if b_deb:
        #if 1:
            valid = np.array(tab_prof_valid)
            zero = np.array(tab_prof_zero)

            #print('valid:', valid.size, valid.min(), valid.mean(), valid.max())
            #print('zero:', zero.size, zero.min(), zero.mean(), zero.max())
            print(valid.mean())
            print(zero.mean())
            #return
            #sys.exit()

        print('t000tot.p{} {:.3f}s {:.2f}Mo'.format(current._identity, timer()-t000, process.memory_info().rss/1024/1024))

        
        hf.create_dataset(tsvar+'/pval', data=var_temp_output[:,:,0])
        hf.create_dataset(tsvar+'/zval', data=var_temp_output[:,:,1])
        hf.create_dataset(tsvar+'/slope', data=var_temp_output[:,:,2])
        hf.create_dataset(tsvar+'/len', data=var_temp_output[:,:,3])

    hf.close() 
    
    print ('Subchunk {} completed, save to {}'.format(child_iteration, subchunk_fname))
    
    return None 
Exemplo n.º 16
0
    def _calc_result_df(self,
                        trim=False,
                        max_relative_slope_error=500.0,
                        max_negative_step=0.05,
                        min_interval_length=2):
        '''
        Calculates self.result_df, a pandas dataframe summarizing the soiling
        intervals identified and self.analyzed_daily_df, a version of
        self.daily_df with additional columns calculated during analysis.

        Parameters
        ----------
        trim : bool, default False
            whether to trim (remove) the first and last soiling intervals to
            avoid inclusion of partial intervals
        max_relative_slope_error : float, default 500
            the maximum relative size of the slope confidence interval for an
            interval to be considered valid (percentage).
        max_negative_step : float, default 0.05
            The maximum magnitude of negative discrete steps allowed in an
            interval for the interval to be considered valid (units of
            normalized performance metric).
        min_interval_length : int, default 2
            The minimum duration for an interval to be considered
            valid.  Cannot be less than 2 (days).
        '''

        daily_df = self.daily_df
        result_list = []
        if trim:
            # ignore first and last interval
            res_loop = sorted(list(set(daily_df['run'])))[1:-1]
        else:
            res_loop = sorted(list(set(daily_df['run'])))

        for r in res_loop:
            run = daily_df[daily_df['run'] == r]
            length = (run.day[-1] - run.day[0])
            start_day = run.day[0]
            end_day = run.day[-1]
            start = run.index[0]
            end = run.index[-1]
            run_filtered = run[run.pi_norm > 0]
            # use the filtered version if it contains any points
            # otherwise use the unfiltered version to populate a
            # valid=False row
            if not run_filtered.empty:
                run = run_filtered
            result_dict = {
                'start': start,
                'end': end,
                'length': length,
                'run': r,
                'run_slope': 0,
                'run_slope_low': 0,
                'run_slope_high': 0,
                'max_neg_step': min(run.delta),
                'start_loss': 1,
                'inferred_start_loss': run.pi_norm.mean(),
                'inferred_end_loss': run.pi_norm.mean(),
                'valid': False
            }
            if len(run) > min_interval_length and run.pi_norm.sum() > 0:
                fit = theilslopes(run.pi_norm, run.day)
                fit_poly = np.poly1d(fit[0:2])
                result_dict['run_slope'] = fit[0]
                result_dict['run_slope_low'] = fit[2]
                result_dict['run_slope_high'] = min([0.0, fit[3]])
                result_dict['inferred_start_loss'] = fit_poly(start_day)
                result_dict['inferred_end_loss'] = fit_poly(end_day)
                result_dict['valid'] = True
            result_list.append(result_dict)

        results = pd.DataFrame(result_list)

        if results.empty:
            raise NoValidIntervalError('No valid soiling intervals were found')

        # Filter results for each interval,
        # setting invalid interval to slope of 0
        results['slope_err'] = (results.run_slope_high -
                                results.run_slope_low) / abs(results.run_slope)
        # critera for exclusions
        filt = ((results.run_slope > 0) |
                (results.slope_err >= max_relative_slope_error / 100.0) |
                (results.max_neg_step <= -1.0 * max_negative_step))

        results.loc[filt, 'run_slope'] = 0
        results.loc[filt, 'run_slope_low'] = 0
        results.loc[filt, 'run_slope_high'] = 0
        results.loc[filt, 'valid'] = False

        # Calculate the next inferred start loss from next valid interval
        results['next_inferred_start_loss'] = np.clip(
            results[results.valid].inferred_start_loss.shift(-1), 0, 1)
        # Calculate the inferred recovery at the end of each interval
        results['inferred_recovery'] = np.clip(
            results.next_inferred_start_loss - results.inferred_end_loss, 0, 1)

        # Don't consider data outside of first and last valid intervals
        if len(results[results.valid]) == 0:
            raise NoValidIntervalError('No valid soiling intervals were found')
        new_start = results[results.valid].start.iloc[0]
        new_end = results[results.valid].end.iloc[-1]
        pm_frame_out = daily_df[new_start:new_end]
        pm_frame_out = pm_frame_out.reset_index() \
                                   .merge(results, how='left', on='run') \
                                   .set_index('date')

        pm_frame_out['loss_perfect_clean'] = np.nan
        pm_frame_out['loss_inferred_clean'] = np.nan
        pm_frame_out['days_since_clean'] = \
            (pm_frame_out.index - pm_frame_out.start).dt.days

        # Calculate the daily derate
        pm_frame_out['loss_perfect_clean'] = \
            pm_frame_out.start_loss + \
            pm_frame_out.days_since_clean * pm_frame_out.run_slope
        # filling the flat intervals may need to be recalculated
        # for different assumptions
        pm_frame_out.loss_perfect_clean = \
            pm_frame_out.loss_perfect_clean.fillna(1)

        pm_frame_out['loss_inferred_clean'] = \
            pm_frame_out.inferred_start_loss + \
            pm_frame_out.days_since_clean * pm_frame_out.run_slope
        # filling the flat intervals may need to be recalculated
        # for different assumptions
        pm_frame_out.loss_inferred_clean = \
            pm_frame_out.loss_inferred_clean.fillna(1)

        self.result_df = results
        self.analyzed_daily_df = pm_frame_out
Exemplo n.º 17
0
                    y = sdatasok
                    X, Y = [], []
                    #only work with notnan values
                    for i in range(0,len(x)):
                            if not np.isnan(x[i]) and not np.isnan(y[i]):
                                    X.append(x[i])
                                    Y.append(y[i])

                    if lok>=nmkmin:
                            p_stat='yes'
                            #Mann-Kendall test
                            [tau,pval]=kendalltau(X,Y)
                            print(tau,pval)

                            #theil slope
                            res=theilslopes(Y,X,sig)
                            reg=res[0]*np.asarray(X)+res[1]*np.ones(len(X))
                            regg=res[0]*np.asarray(mods)+res[1]*np.ones(len(mods))
                            spyr=res[0]*365*100/reg[0] #% per year
                            reg0=reg[0]
                            #str_b = "%3.2f" % res[1]
                    else:
                            tau, pval, spyr, reg0 = float('NaN'), float('NaN'), float('NaN'), float('NaN')
                    str_tau = "%5.2f" % tau
                    str_pval = "%5.4f" % pval
                    str_a = "%4.1f" % spyr

            #listing of statistics
            taus.append(tau), pvals.append(pval), spyrs.append(spyr), reg0s.append(reg0)

            #plotting
                        filename2)).variables[var][stmon:, :, :]
        mask = IO.Land_Mask(root, model)
        data_land_monthly = nanmean(nanmean(data * mask, axis=2), axis=1)
        # calculate global annual mean
        data_land_annual = vstack([
            sum(data_land_monthly[mon:mon + 12])
            for mon in xrange(0, len(data_land_monthly), 12)
        ])

        # calculate moving trend
        slope = np.empty((edyr - styr - 9, edyr - styr - 9))
        slope.fill(np.nan)
        for st in xrange(0, edyr - styr - 9):
            for ed in xrange(st + 10, edyr - styr + 1):
                slope[ed - 10,
                      st] = mstats.theilslopes(data_land_annual[st:ed],
                                               alpha=0.95)[0]

        # Mapping
        print model, var
        x = np.arange(styr, edyr - 9, 1.)
        y = np.arange(styr + 10, edyr + 1, 1.)
        X, Y = np.meshgrid(x, y)
        # create figure
        clevs = arange(limits[v][0], limits[v][1] + 0.01,
                       (limits[v][1] - limits[v][0]) / 100)
        cblevs = arange(limits[v][0], limits[v][1] + 0.01,
                        round((limits[v][1] - limits[v][0]) / 10, 2))
        fig = plt.figure(figsize=(12, 8), dpi=100, facecolor="white")
        font = {
            'family': 'serif',
            'color': 'darkred',
Exemplo n.º 19
0
def rna_dna_correspondence_main(loomfile, args_seg_file, args_patient_column,
                                args_patient, args_time_point, output=None):
    """

    Parameters
    ----------
    loomfile :
        
    args_seg_file :
        
    args_patient_column :
        
    args_patient :
        
    args_time_point :
        
    output :
         (Default value = None)

    Returns
    -------

    """
    with loompy.connect(loomfile, validate=False) as loom:
        genes = loom.ra['gene']
        metadata = pd.DataFrame(loom.ca['patient_ID']).astype(str)
        metadata.columns = ['patient_ID']
        metadata['complexity'] = loom.ca['complexity']
        metadata['cell_type'] = loom.ca['cell_type']
        metadata['time_point'] = loom.ca['time_point']

        list_of_gene_windows = wme.get_list_of_gene_windows(
            genes, window_size=800, window_step=300)

        segmentation = pd.read_table(args_seg_file)

        copy_ratio_dict = dna.segmentation_to_copy_ratio_dict(
            genes,
            segmentation,
            chrom_col='Chromosome',
            start_col='Start.bp',
            end_col='End.bp',
            score_col='tau',
            log2=True)
        gotten_cr = [
            np.mean([copy_ratio_dict[gene] for gene in window])
            for window in list_of_gene_windows
        ]
        gotten_wme, gotten_wme_metadata = wme.get_windowed_mean_expression(loom,
            list_of_gene_windows,
            patient=args_patient,
            patient_column='patient_ID',
            upper_cut=0)

        ps = []
        ps_tumor = []
        ps_tumor_low_complexity = []

        ps_nontumor = []
        ps_nontumor_low_complexity = []
        sorted_cr = np.argsort(gotten_cr)
        #    sorted_cr = np.hstack((sorted_cr[0:len(sorted_cr) // 5],
        #                           sorted_cr[4 * len(sorted_cr) // 5:len(sorted_cr)]))

        celltypes = metadata[metadata['patient_ID'] == args_patient]['cell_type'].values
        complexities = metadata[metadata['patient_ID'] == args_patient]['complexity'].values
        tps = metadata[metadata['patient_ID'] == args_patient]['time_point'].values
        for i in tqdm(range(gotten_wme.shape[1])):
#            bah = mk.original_test(gotten_wme[:, i][sorted_cr])
#            p = (1 - stats.norm.cdf(bah.z))
            p, a, b, c = theilslopes(gotten_wme[:,i], gotten_cr)
            if complexities[i] > 500:
                if celltypes[i] == 'tumor':
                    ps_tumor.append(p)
                else:
                    ps_nontumor.append(p)
            else:
                if celltypes[i] == 'tumor':
                    ps_tumor_low_complexity.append(p)
                else:
                    ps_nontumor_low_complexity.append(p)
#                        if complexities[i] > 500:
#                            ps_tumor.append(p)
#                        else:
#                            ps_tumor_low_complexity.append(p)
#        celltypes = metadata.query(
#            'patient_ID == {}'.format(args_patient))['cell_type'].values
#        complexities = metadata.query(
#            'patient_ID == {}'.format(args_patient))['complexity'].values
#        tps = metadata.query(
#            'patient_ID == {}'.format(args_patient))['time_point'].values
#        iterator = 0
#        for i in tqdm(range(gotten_wme.shape[1])):
#            if tps[i] == args_time_point:
#                if celltypes[i] != 'tumor':
#                    bah = mk.original_test(gotten_wme[:, i][sorted_cr])
#                    p = (1 - stats.norm.cdf(bah.z))
#                    p = bah.z
#                    #if p > 1:
#                    #    plt.plot(gotten_wme[:,i][sorted_cr])
#                    #    plt.show()
#                    ps.append(p)
#                    if celltypes[i] == 'tumor':
#                        ps_tumor.append(p)
#                    else:
#                        ps_nontumor.append(p)
#                    if iterator == 500:
#                        break
#                    iterator += 1
#        iterator = 0
#        sorted_cr = np.argsort(gotten_cr)
##        celltypes = metadata.query(
##            'patient_ID == {}'.format(args_patient))['cell_type'].values
#        for i in tqdm(range(gotten_wme.shape[1])):
#            if tps[i] == args_time_point:
#                if celltypes[i] == 'tumor':
#                    bah = mk.original_test(gotten_wme[:, i][sorted_cr])
#                    p = (1 - stats.norm.cdf(bah.z))
#                    p = bah.z
#                    ps.append(p)
#                    if celltypes[i] == 'tumor':
#                        #    if p < 3:
#                        #       plt.plot(gotten_wme[:,i][sorted_cr])
#                        #       plt.show()
#                        if complexities[i] > 1000:
#                            ps_tumor.append(p)
#                        else:
#                            ps_tumor_low_complexity.append(p)
#                    else:
#                        ps_nontumor.append(p)
#                    if iterator > 500 and len(ps_tumor_low_complexity) > 0:
#                        break
#                    iterator += 1
        sns.distplot(ps_nontumor, label='non-malignant', color='r')
        sns.distplot(ps_nontumor_low_complexity, label='low complexity non-malignant', color='y')
        sns.distplot(ps_tumor, label='high complexity_tumor', color='k')
        sns.distplot(ps_tumor_low_complexity, label='low complexity tumor', color='b')
        plt.legend()
        #from IPython.core.debugger import set_trace; set_trace()
        # The IPython is not in the dependencies and the output is disabled from Feb 2020.
        # Ipython debug mode should be removed.
        #if output:

        warn(f"Output is disabled from 20 Feb 2020. Saving to {output}")

        plt.savefig(output)
Exemplo n.º 20
0
y = np.sin(2 * np.pi * x)

slope = 0.2  # [1/year]

# sinus + slope
y += slope * x
# random + slope only
#y = 0.1*np.random.rand(len(x)) + slope*x

#plt.plot(x,y)
#plt.show()

p, z, Sn, nx = m.mk_trend(len(y), np.arange(len(y)), y)
print('p, z, Sn, nx:')
print(p, z, Sn, nx)

slope2, intercept, lo_slope, up_slope = mstats.theilslopes(y)
print('slope2, intercept, lo_slope, up_slop:')
print(slope2, intercept, lo_slope, up_slope)

res_smk = sk.seakeni(y, 365)
print(res_smk)

print('Summary:')
print("mk fortran : {}, err[%] = {:.2f}".format(
    Sn * freq, 100 * (slope - Sn * freq) / slope))
print("mk scipy   : {}, err[%] = {:.2f}".format(
    slope2 * freq, 100 * (slope - slope2 * freq) / slope))
print("smk fortran: {}, err[%] = {:.2e}".format(
    res_smk[1], 100 * (slope - res_smk[1]) / slope))
Exemplo n.º 21
0
def trend_CI(x_var, y_var, n_boot=1000, ci=95, trendtype="linreg", q=0.5, frac=0.6, it=3, autocorr=None, CItype="bootstrap"):
    """calculates bootstrap confidence interval and significance level for trend, ignoring autocorrelation or accounting for it
    Parameters
    ----------
    x_var : list
      independent variable
    y_var : list
      dependent variable, same length as x_var
    q : int, optional, only if trendtype==quantreg
      quantile for which regression is to be calculated
    n : int, optional
      number of bootstrap samples
    ci : int, optional
      confidence level. Default is for 95% confidence interval
    frac : int, optional, only if trendtype==lowess
      lowess parameter (fraction of time period length used in local regression)
    it : int, optional, only if trendtype==lowess
      lowess parameter (numbre of iterations)
    autocorr : str, optional
      way of accounting for autocorrelation, possible values: None, "bootstrap"
    trendtype : str, optional
      method of trend derivation, possible values: lowess, linreg, quantreg, TheilSen
    CItype : str, optional
      method of CI derivation, possible values: "analytical" and "bootstrap". 
      if trendtype is "lowess", CItype will be set to None
      if CItype is "analytical": autocorrelation will be set to None
      

    Results
    -------
    returns library with following elements:
    slope - slope of the trend
    CI_high - CI on the slope value
    CI_low - as above
    pvalue - trend's significance level
    trend - trend line, or rather its y values for all x_var
    trendCI_high - confidence interval for each value of y
    trendCI_low - as above

    Remarks
    -------
    the fit function ocassionally crashes on resampled data. The workaround is to use try statement
    """
    import numpy as np
    import pandas as pd
    #for linreg
    import statsmodels.api as sm
    from statsmodels.regression.linear_model import OLS
    #for arima
    import statsmodels.tsa as tsa
    #for quantreg
    import statsmodels.formula.api as smf
    from statsmodels.regression.quantile_regression import QuantReg
    #for lowess
    import statsmodels.nonparametric.api as npsm
    #other
    from statsmodels.distributions.empirical_distribution import ECDF
    from scipy.stats import mstats, mannwhitneyu, t, kendalltau
    from arch.bootstrap import StationaryBootstrap, IIDBootstrap

    #preparing data
    if CItype=="analytical" and trendtype=="TheilSen":
        CItype="bootstrap"
    x_var=np.array(x_var)
    y_var=np.ma.masked_invalid(y_var)
    n_data=len(y_var)
    ci_low=(100-ci)/2
    ci_high=100-ci_low
    
    #setting bootstrapping function
    if autocorr=="bootstrap":
        bs=StationaryBootstrap(3, np.array(range(len(y_var))))
    else:
        bs=IIDBootstrap(np.array(range(len(y_var))))
    
    if trendtype=="quantreg":
        print "Quantile regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        xydata=pd.DataFrame(np.column_stack([x_var, y_var]), columns=['X', 'Y'])
        model=smf.quantreg('Y ~ X', xydata)
        res=model.fit(q=q)
        intcpt=res.params.Intercept
        slope=res.params.X
        pvalue=res.pvalues[1]
        CI_low=res.conf_int()[0]['X']
        CI_high=res.conf_int()[1]['X']
        y_pred=res.predict(xydata)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
            
        if CItype=="bootstrap":
            #bootstrapping
            bs_trends=np.copy(y_pred).reshape(-1,1)
            bs_slopes=[]
            bs_intcpts=[]
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = smf.quantreg('Y ~ X', xydata.ix[ind,:])
                try:
                    res = model.fit(q=q)
                    bs_slopes=bs_slopes+[res.params.X]
                    bs_intcpts=bs_intcpts+[res.params.Intercept]
                    bs_trends=np.append(bs_trends,res.predict(xydata).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
    if trendtype=="linreg":
        print "Linear regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        x_varOLS = sm.add_constant(x_var)
        model = sm.OLS(y_var, x_varOLS, hasconst=True, missing='drop')
        res = model.fit()
        intcpt,slope=res.params
        pvalue=res.pvalues[1]
        CI_low,CI_high=res.conf_int()[1]
        y_pred=res.predict(x_varOLS)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
        
        if CItype=="bootstrap":        
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = sm.OLS(y_var[ind], x_varOLS[ind,:], hasconst=True, missing='drop')
                try:
                    res = model.fit()
                    bs_slopes=bs_slopes+[res.params[1]]
                    bs_intcpts=bs_intcpts+[res.params[0]]
                    bs_trends=np.append(bs_trends,res.predict(x_varOLS).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
                    
    if trendtype=="TheilSen":
#        print "Theil-Sen slope, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        #significance of MK tau
        tau,pvalue=kendalltau(x_var, y_var)
#        print "raw MK tau:", tau, "raw MK pvalue:", pvalue
        #TS slope and confidence intervals
        slope,intercept,CI_low,CI_high=mstats.theilslopes(y_var, x_var, alpha=0.95)        
        #getting slope line's y values
        y_pred=intercept+slope*x_var
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
                    
        if CItype=="bootstrap":
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                res=mstats.theilslopes(y_var[ind], x_var[ind], alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
                bs_intcpts=bs_intcpts+[res[1]]
                bs_trends=np.append(bs_trends, (res[1]+res[0]*x_var).reshape(-1,1), 1)

    if trendtype=="lowess":
        print "Lowess\n"
        temp=dict(npsm.lowess(y_var, x_var, frac=frac, it=it, missing="drop"))
        y_pred=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
        bs_trends=np.copy(y_pred)
        
        for data in bs.bootstrap(n_boot):
            ind=data[0][0]
            try:
                temp = dict(npsm.lowess(y_var[ind], x_var[ind], frac=frac, it=it, missing="drop"))
                temp=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
                pred=pd.DataFrame(temp, index=x_var)
                temp_interp=pred.interpolate().values
                bs_trends=np.append(bs_trends, temp_interp, 1)
            except:
                goingdownquietly=1


    #calculating final values of CI and p-value

    #skipping when lowess
    if trendtype=="lowess":
        CI_low=np.nan
        CI_high=np.nan
        slope=np.nan
        intcpt=np.nan
        pvalue=np.nan
        confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
        trendCI_low=confint[:,0]
        trendCI_high=confint[:,1]
    else:
        if CItype=="bootstrap":
            #values for slope, intercept and trend can be obtained as medians of bootstrap distributions, but normally analytical parameters are used instead
            # it the bootstrap bias (difference between analytical values and bootstap median) is strong, it might be better to use bootstrap values. 
            # These three lines would need to be uncommented then
#            slope=np.median(bs_slopes)
#            intcpt=np.median(bs_intcpts)
#            trend=intcpt+slope*x_var
            #these are from bootstrap too, but needs to be used for this accounts for autocorrelation, which is the point of this script
            CI_low,CI_high=np.percentile(bs_slopes, [5, 95])                
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(0)
            #this makes sure we are calculating p-value on the correct side of the distribution. That will be one-sided pvalue
            if pvalue>0.5:
                pvalue=1-pvalue
            confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
            print "bs_trends:", bs_trends.shape, confint.shape
            trendCI_low=confint[:,0]
            trendCI_high=confint[:,1]
        else:
            #this is for analytical calculation of trend confidence interval
            #it happens in the same way for each of the trend types, thus it is done here, not under the trendtype subroutines
            #making sure x are floats
            xtemp=np.array(x_var)*1.0
            #squared anomaly
            squanom=(xtemp-np.mean(xtemp))**2
            temp=((1./len(x_var))+(squanom/sum(squanom)))**0.5
            #standard error of estmation
            see=(np.nansum((np.array(y_var)-np.nanmean(y_pred))**2)/len(x_var))**0.5
            #adjusting ci
            ci_adj=1-((1-ci/100.)/2)
            #accounting for uncertainty in mean through student's t
            tcomp=t.ppf(ci_adj, len(x_var)-2)
            #confidence interval
            cint=tcomp*see*temp
            #for trend only
            trendCI_high=y_pred+cint
            trendCI_low=y_pred-cint

        print trendtype, "slope:",slope, "pvalue (one sided):", pvalue, "conf interval:", CI_low, CI_high, "autocorrelation adjustment:", autocorr, "\n"
    output={"slope":slope, "CI_high":CI_high, "CI_low":CI_high, "pvalue":pvalue, "trend": y_pred, "trendCI_low":trendCI_low, "trendCI_high":trendCI_high}
    return output
Exemplo n.º 22
0
def compute_trends_current(s_monthly, periods, only_yearly=True):
    """Compute trends for station
    
    Slightly modified code from original trends interface developed by 
    A. Mortier.
    
    Main changes applied:
        
        - Keep NaNs
    """
    #sm = to_monthly_current_trends_interface(s0, MIN_DIM)
    d = dict(month=s_monthly.index.month,
             year=s_monthly.index.year,
             value=s_monthly.values)

    mobs = pd.DataFrame(d)

    mobs['season'] = mobs.apply(
        lambda row: _get_season_current(row['month'], row['year']), axis=1)

    mobs = mobs.dropna(subset=['value'])

    #trends with yearly and seasonal averages
    seasons = ['spring', 'summer', 'autumn', 'winter', 'all']
    yrs = np.unique(mobs['year'])

    data = {}

    for i, seas in enumerate(seasons):
        if only_yearly and not seas == 'all':
            continue
        #initialize seasonal object
        data[seas] = {'date': [], 'jsdate': [], 'val': []}
        #filter the months
        for yr in yrs:
            if seas != 'all':
                catch = mobs[mobs['season'].str.contains(seas + '-' + str(yr))]
            else:
                catch = mobs[mobs['season'].str.contains('-' + str(yr))]
            date = _mid_season_current(seas, yr)

            data[seas]['date'].append(date)
            epoch = datetime.datetime(1970, 1, 1)
            data[seas]['jsdate'] = [(dat - epoch).total_seconds() * 1000
                                    for dat in data[seas]['date']]
            #needs 4 seasons to compute seasonal average to avoid biases
            if (seas == 'all') & (len(np.unique(catch['season'].values)) < 4):
                data[seas]['val'].append(np.nan)
            else:
                data[seas]['val'].append(np.nanmean(catch['value']))

        #trends for this season
        data[seas]['trends'] = {}

        #filter period
        for period in periods:
            p0 = int(period[:4])
            p1 = int(period[5:])
            data[seas]['trends'][period] = {}

            #Mann-Kendall test
            x = np.array(data[seas]['jsdate'])
            y = np.array(data[seas]['val'])
            #works only on not nan values
            x = x[~np.isnan(y)]
            y = y[~np.isnan(y)]
            #filtering to the period limit
            jsp0 = (datetime.datetime(p0, 1, 1) - epoch).total_seconds() * 1000
            jsp1 = (datetime.datetime(p1, 12, 31) -
                    epoch).total_seconds() * 1000
            y = y[(x >= jsp0) & (x <= jsp1)]
            x = x[(x >= jsp0) & (x <= jsp1)]

            if len(x) > 2:
                #kendall
                [tau, pval] = kendalltau(x, y)
                data[seas]['trends'][period]['pval'] = pval

                #theil slope
                res = theilslopes(y, x, 0.9)

                reg = res[0] * np.asarray(x) + res[1] * np.ones(len(x))
                slp = res[0] * 1000 * 60 * 60 * 24 * 365 / reg[
                    0]  #slp per milliseconds to slp per year
                data[seas]['trends'][period]['slp'] = slp * 100  #in percent
                data[seas]['trends'][period]['reg0'] = reg[0]
                data[seas]['trends'][period]['t0'] = x[0]
                data[seas]['trends'][period]['n'] = len(y)
            else:
                data[seas]['trends'][period]['pval'] = None
                data[seas]['trends'][period]['slp'] = None
                data[seas]['trends'][period]['reg0'] = None
                data[seas]['trends'][period]['t0'] = None
                data[seas]['trends'][period]['n'] = len(y)

    return data
Exemplo n.º 23
0
 def _calc_skew_angle(cls, source: OffsetSeries) -> Tuple[float, float]:
     # We use ordinary linear regression just for the R^2 value (below function does not provide it)
     _, _, rval, _, _ = scipy_stats.linregress(source.reception_times, source.offsets)
     # Apply robust linear regression - note that (x, y) is flipped in the argument list
     medslope, _, _, _ = scipy_mstats.theilslopes(source.offsets, source.reception_times)
     return medslope, rval ** 2
Exemplo n.º 24
0
def compute_trends_current(s_monthly, periods, only_yearly=True):
    """ Compute trends for station. 
    
    Used in the trends interface to compute the trends before. 05.06.19.

    Slightly modified code from original trends interface developed by
    A. Mortier.

    Parameters
    ------------------------

    s_monthly : pd.DataFrame
        Dataframe containing montly values of data.
    
    periods : list[str]
        List containing periods.

    only_yearly : boolean
    
    Returns 
    ------------------------
    data : dict
        Dictionary containing the trends.

    Main changes applied:
        - Keep NaNs

    """

    d = dict(month=s_monthly.index.month,
             year=s_monthly.index.year,
             value=s_monthly.values)

    mobs = pd.DataFrame(d)
    mobs['season'] = mobs.apply(
        lambda row: _get_season_current(row['month'], row['year']), axis=1)
    # drop rows where value = nan.
    mobs = mobs.dropna(subset=['value'])

    # trends with yearly and seasonal averages
    seasons = ['spring', 'summer', 'autumn', 'winter', 'all']
    yrs = np.unique(mobs['year'])
    #print('yrs {}'.format(yrs))
    data = {}

    # added to minimize the computation
    for i, seas in enumerate(seasons):
        if only_yearly and not seas == 'all':
            continue
        # initialize seasonal object
        data[seas] = {'date': [], 'jsdate': [], 'val': []}
        # filter the months
        for yr in yrs:
            if seas != 'all':
                catch = mobs[mobs['season'].str.contains(seas + '-' + str(yr))]
            else:
                catch = mobs[mobs['season'].str.contains('-' + str(yr))]
            date = _mid_season_current(seas, yr)

            data[seas]['date'].append(date)
            epoch = datetime.datetime(1970, 1, 1)
            data[seas]['jsdate'] = [(dat - epoch).total_seconds() * 1000
                                    for dat in data[seas]['date']]

            # needs 4 seasons to compute seasonal average to avoid biases
            if (seas == 'all') & (len(np.unique(catch['season'].values)) < 4):
                data[seas]['val'].append(np.nan)
            else:
                #print(catch['value'])
                data[seas]['val'].append(np.nanmean(catch['value']))

        # trends for this season
        data[seas]['trends'] = {}
        # filter period
        for period in periods:
            p0 = int(period[:4])
            p1 = int(period[5:])
            data[seas]['trends'][period] = {}

            # Mann-Kendall test
            x = np.array(data[seas]['jsdate'])
            y = np.array(data[seas]['val'])
            len_period = len(y)

            # works only on not nan values
            x = x[~np.isnan(y)]  # Better ith np.isfinite()
            y = y[~np.isnan(y)]

            # filtering to the period limit
            jsp0 = (datetime.datetime(p0, 1, 1) - epoch).total_seconds() * 1000
            jsp1 = (datetime.datetime(p1, 12, 31) -
                    epoch).total_seconds() * 1000

            y = y[(x >= jsp0) & (x <= jsp1)]
            x = x[(x >= jsp0) & (x <= jsp1)]

            # Making sure there is at least 75% coverage in the data period.
            # and that we have more than two points
            if len(y) / len_period >= 0.75 and len(y) > 1:
                # Kendall

                # TODO THIS IS WHERE YOU SHOULD ASK AUGUSTIN HOW THINGS
                # SHOULD BE RESTRICTED BY KENTAL TAu

                [tau, pval] = kendalltau(x, y)
                #print('pval {}'.format(pval))
                data[seas]['trends'][period]['pval'] = pval

                if pval < 0.1:
                    # Theil slope
                    res = theilslopes(y, x, 0.9)
                    medslope, medintercept, lo_slope, up_slope = res
                    reg = medslope * np.asarray(x) + medintercept * np.ones(
                        len(x))
                    slp = res[0] * 1000 * 60 * 60 * 24 * 365.25 / reg[
                        0]  # slp per milliseconds to slp per year
                    data[seas]['trends'][period][
                        'slp'] = slp * 100  # in percent
                    data[seas]['trends'][period]['reg0'] = reg[0]
                    data[seas]['trends'][period]['t0'] = x[0]
                    data[seas]['trends'][period]['n'] = len(y)
                else:
                    data[seas]['trends'][period]['pval'] = None
                    data[seas]['trends'][period]['slp'] = None
                    data[seas]['trends'][period]['reg0'] = None
                    data[seas]['trends'][period]['t0'] = None
                    data[seas]['trends'][period]['n'] = len(y)
            else:
                data[seas]['trends'][period]['pval'] = None
                data[seas]['trends'][period]['slp'] = None
                data[seas]['trends'][period]['reg0'] = None
                data[seas]['trends'][period]['t0'] = None
                data[seas]['trends'][period]['n'] = len(y)
    return data
    """
    def test_unitconversion_surface_conc():
        a = 10
        temp = unitconv_sfc_conc(a, 2)
        A = unitconv_sfc_conc_bck(temp, 2)
        assert np.abs(a - A) < 0.000001
    """
    """
            matched = np.array(
                [x[idx], xr, y[idx], yr, lums[idx], results["m"]]).T
            names = [
                "x_orig", "x_match", "y_orig", "y_match", "lum_orig",
                "lum_match"
            ]

            tbl_matched = Table(data=matched, names=names)

            ###################################################################

            n = 26
            bins = np.logspace(-3, 2, n)

            mask = tbl_matched["lum_orig"] > 0.5
            f = theilslopes(tbl_matched["lum_match"][mask],
                            tbl_matched["lum_orig"][mask])[0]

            mass_orig = imf.mass_from_luminosity(tbl_matched["lum_orig"])
            mass_found = imf.mass_from_luminosity(results["m"] / f)
            mass_match = imf.mass_from_luminosity(tbl_matched["lum_match"] / f)
            mass_ratio = mass_match / mass_orig

            results.add_column(Column(data=mass_found, name="mass_found"))

            tbl_mass = Table(data=[mass_orig, mass_match],
                             names=["mass_orig", "mass_match"])
            tbl_matched = hstack(tbl_matched, tbl_mass)

            tbl_stats = imf.binned_clipped_stats(mass_orig, mass_ratio, bins)

            # mask = (tbl_stats["std"] > 0) * (tbl_stats["std"] < 99) * (bins > 1E-2)[1:]
# tstep = len(dates)  # tstep = (edyr-styr+1)*12

for m, model in enumerate(models):
	for v, var in enumerate(vars):
		data = Dataset('%s%s/%s/%s_Amon_%s_%s' % (root, model, res[1], var, model, filename2)).variables[var][stmon:, :, :]
		mask = IO.Land_Mask(root, model)
		data_land_monthly = nanmean(nanmean(data*mask, axis=2), axis=1)
		# calculate global annual mean
		data_land_annual = vstack([sum(data_land_monthly[mon:mon+12]) for mon in xrange(0, len(data_land_monthly), 12)])

		# calculate moving trend
		slope = np.empty((edyr-styr-9, edyr-styr-9))
		slope.fill(np.nan)
		for st in xrange(0, edyr - styr - 9):
			for ed in xrange(st+10, edyr - styr + 1):
				slope[ed - 10, st] = mstats.theilslopes(data_land_annual[st:ed], alpha=0.95)[0]

		# Mapping
		print model, var
		x = np.arange(styr, edyr-9, 1.)
		y = np.arange(styr+10, edyr+1, 1.)
		X, Y = np.meshgrid(x, y)
		# create figure
		clevs = arange(limits[v][0], limits[v][1]+0.01, (limits[v][1]-limits[v][0])/100)
		cblevs = arange(limits[v][0], limits[v][1]+0.01, round((limits[v][1]-limits[v][0])/10, 2))
		fig = plt.figure(figsize=(12, 8), dpi=100, facecolor="white")
		font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 50}
		im = plt.contourf(X, Y, slope, clevs, cmap=plt.cm.seismic)
		cb = plt.colorbar(im, ticks=cblevs)
		plt.xlabel("STARTING YEAR")
		plt.ylabel("ENDING YEAR")
Exemplo n.º 27
0
def TheilSenXY(x, y):
    res = mstats.theilslopes(x, y)
    return res[0][0], res[1][0]