Пример #1
0
def normalized_diff_mean_power(multarray, labels, smoother_size=(5, 5)):
    lls = np.array(labels)  # make sure this is an array
    # convert to log scale
    arr0 = np.log(multarray[lls == 0])
    arr1 = np.log(multarray[lls == 1])

    m0 = np.nanmean(arr0, axis=0)
    m1 = np.nanmean(arr1, axis=0)

    smoother = np.ones(smoother_size)
    smoother = smoother / np.sum(smoother)

    v0 = np.nanvar(arr0, axis=0, ddof=1)
    v1 = np.nanvar(arr1, axis=0, ddof=1)

    v0 = convolve2d(np.nan_to_num(v0), smoother, mode='same')
    v1 = convolve2d(np.nan_to_num(v1), smoother, mode='same')

    s0 = np.sqrt(v0)
    s1 = np.sqrt(v1)

    n0 = np.sum(lls == 0)
    n1 = np.sum(lls == 1)

    numer = m0 - m1 + 0.5 * (s0 ** 2 - s1 ** 2)
    denom = np.sqrt((s0 ** 2 / n0) + (s1 ** 2 / n1) + (s0 ** 4 / (n0 - 1)) +
        (s1 ** 4 / (n1 - 1)))

    return numer / denom
Пример #2
0
    def calc_stresses(self, beamvel, beamAng):
        """
        Calculate the stresses from the difference in the beam variances.

        Reference: Stacey, Monosmith and Burau; (1999) JGR [104]
        "Measurements of Reynolds stress profiles in unstratified
        tidal flow"
        """
        fac = 4 * np.sin(self['config']['beam_angle'] * deg2rad) * \
            np.cos(self['config']['beam_angle'] * deg2rad)
        # Note: Stacey defines the beams incorrectly for Workhorse ADCPs.
        #       According to the workhorse coordinate transformation
        #       documentation, the instrument's:
        #                        x-axis points from beam 1 to 2, and
        #                        y-axis points from beam 4 to 3.
        #       Therefore:
        stress = ((np.nanvar(self.reshape(beamvel[0]), axis=-1) -
                   np.nanvar(self.reshape(beamvel[1]), axis=-1)) + 1j *
                  (np.nanvar(self.reshape(beamvel[2]), axis=-1) -
                   np.nanvar(self.reshape(beamvel[3]), axis=-1))
                  ) / fac
        if self.config.orientation == 'up':
            # This comes about because, when the ADCP is 'up', the u
            # and w velocities need to be multiplied by -1 (equivalent
            # to adding pi to the roll).  See the coordinate
            # transformation documentation for more info.
            #
            # The uw (real) component has two minus signs, but the vw (imag)
            # component only has one, therefore:
            stress.imag *= -1
        stress *= rotate.inst2earth_heading(self)
        if self.props['coord_sys'] == 'principal':
            stress *= np.exp(-1j * self.props['principal_angle'])
        return stress.real, stress.imag
Пример #3
0
def plot_profile_TKE_wind(synth):

	fig,ax=plt.subplots()
	colors=get_colors(synth)
	c=0	
	for key,value in synth.iteritems():
		for v in value:
			scase=str(key).zfill(2)
			sleg=str(v).zfill(2)
			synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf'
			U = read_synth(synthfile,'F2U')
			V = read_synth(synthfile,'F2V')
			Z = read_synth(synthfile,'z')
			x=[]
			y=[]
			for n,z in enumerate(Z[1:15]):
				u=U[:,:,n+1]
				v=V[:,:,n+1]
				u_var=np.nanvar(u)
				v_var=np.nanvar(v)
				TKE=(u_var+v_var)/2.
				x.append(TKE)
				y.append(z)
			label='Case: '+scase+' Leg: '+sleg
			ax.plot(x,y,'-',label=label,color=colors[c])
			ax.set_ylim([0,4])
			ax.set_xlabel('TKE [m2 s^-2]')
			ax.set_ylabel('Altitude MSL [km]')
			c+=1
	plt.suptitle('Spatial TKE at P3 synth levels ')			
	plt.draw()
	plt.legend()
Пример #4
0
def plot_profile_variance(dbz,vvel,ht, ax,case,ncases):

	dbz_variance=[]
	vvel_variance=[]
	count_gates=[]
	global ti
	global n
	global colors 

	if n==0:
		# colors=sns.color_palette('hls',ncases)
		colors=sns.color_palette('Paired',ncases)

	for i in range(len(ht)):
		dbz_variance.append(np.nanvar(dbz[i,:]))
		vvel_variance.append(np.nanvar(vvel[i,:]))
		count_gates.append(vvel[i,:].size-np.sum(np.isnan(vvel[i,:])))

	inid=datetime(*(reqdates[case]['ini']+[0,0]))
	endd=datetime(*(reqdates[case]['end']+[0,0]))
	ti.append('\nCase '+case+': '+inid.strftime('%Y-%b %dT%H:%M')+endd.strftime(' - %dT%H:%M UTC'))

	if n<7:
		marker='None'
		# marker='o'
	else:
		marker='o'

	dbzv=[0,180]
	vvelv=[0,6]
	if np.any(ax):
		ax[0].plot(dbz_variance,ht,marker=marker,color=colors[n])
		ax[1].plot(vvel_variance,ht,marker=marker,color=colors[n])
		ax[2].plot(count_gates,ht,marker=marker,color=colors[n],label='case '+case)
		n+=1
	else:
		fig,ax=plt.subplots(1,3,sharey=True,figsize=(12,8))
		ax[0].plot(dbz_variance,ht,color=colors[n])
		ax[1].plot(vvel_variance,ht,color=colors[n])
		ax[2].plot(count_gates,ht,color=colors[n], label='case '+case)
		ax[0].set_ylabel('Height MSL [km]')
		ax[0].set_xlabel('Reflectivity [dBZ^2]')
		ax[1].set_xlabel('Vertical velocity [m2 s^-2]')
		ax[2].set_xlabel('Count good gates')
		ax[0].set_xlim(dbzv)
		ax[1].set_xlim(vvelv)
		n+=1
		return ax

	if n==ncases and ncases==4:
		plt.suptitle('SPROF time variance'+''.join(ti))
		plt.subplots_adjust(top=0.85, left=0.05, right=0.95, wspace=0.05)
		ax[2].legend(loc='lower left')		
	elif n==ncases and ncases>4:
		plt.suptitle('SPROF time variance')
		plt.subplots_adjust(top=0.9, left=0.05, right=0.95, wspace=0.06)
		ax[2].legend()		

	plt.draw()
Пример #5
0
    def test_nanvar(self):
        tgt = np.var(self.mat)
        for mat in self.integer_arrays():
            assert_equal(np.nanvar(mat), tgt)

        tgt = np.var(mat, ddof=1)
        for mat in self.integer_arrays():
            assert_equal(np.nanvar(mat, ddof=1), tgt)
Пример #6
0
def test_nanvar(eng):
    original = arange(24).reshape((2, 3, 4)).astype(float64)
    data = fromlist(list(original), engine=eng)
    assert allclose(data.nanvar().shape, (1, 3, 4))
    assert allclose(data.nanvar().toarray(), nanvar(original, axis=0))

    original[0, 2, 3] = nan
    original[1, 0, 2] = nan
    original[1, 2, 2] = nan
    data = fromlist(list(original), engine=eng)
    assert allclose(data.nanvar().shape, (1, 3, 4))
    assert allclose(data.nanvar().toarray(), nanvar(original, axis=0))
Пример #7
0
def bayes_precision(x, y, distribution='normal', posterior_width=0.08, num_iters=25000, inference='sampling'):
    """ Bayes precision computation.

    :param x: sample of a treatment group
    :type  x: pd.Series or list (array-like)
    :param y: sample of a control group
    :type  y: pd.Series or list (array-like)
    :param distribution: name of the KPI distribution model, which assumes a Stan model file with the same name exists
    :type  distribution: str
    :param posterior_width: the stopping criterion, threshold of the posterior  width
    :type  posterior_width: float
    :param num_iters: number of iterations of bayes sampling
    :type  num_iters: int
    :param inference: sampling or variational inference method for approximation the posterior
    :type  inference: str

    :return: results of type EarlyStoppingTestStatistics (without p-value and stat. power)
    :rtype:  EarlyStoppingTestStatistics
    """

    logger.info("Started running bayes precision with {} procedure, treatment group of size {}, "
                "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference))

    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution,
                                                   num_iters=num_iters, inference=inference)
    trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces)
    trace_absolute_effect_size = traces['delta']

    credible_mass = 0.95
    left_out      = 1.0 - credible_mass
    p1            = round(left_out/2.0, 5)
    p2            = round(1.0 - left_out/2.0, 5)

    credible_interval_delta            = HDI_from_MCMC(trace_absolute_effect_size, credible_mass)
    credible_interval_delta_normalized = HDI_from_MCMC(trace_normalized_effect_size, credible_mass)

    stop = credible_interval_delta_normalized[1] - credible_interval_delta_normalized[0] < posterior_width

    treatment_statistics = SampleStatistics(int(n_x), float(mu_x), float(np.nanvar(x)))
    control_statistics   = SampleStatistics(int(n_y), float(mu_y), float(np.nanvar(y)))
    variant_statistics   = BaseTestStatistics(control_statistics, treatment_statistics)

    logger.info("Finished running bayes precision with {} procedure, treatment group of size {}, "
                "control group of size {}, {} distribution.".format(len(x), len(y), distribution, inference))

    return EarlyStoppingTestStatistics(variant_statistics.control_statistics,
                                       variant_statistics.treatment_statistics,
                                       float(mu_x - mu_y),
                                       dict([(p * 100, v) for p, v in zip([p1, p2], credible_interval_delta)]),
                                       None, None, stop)
Пример #8
0
def test_nanvar(eng):
    arr = array([arange(8), arange(8)]).astype(float64)
    data = fromarray(arr, engine=eng)
    val = data.nanvar().toarray()
    expected = nanvar(data.toarray(), axis=0)
    assert allclose(val, expected)
    assert str(val.dtype) == 'float64'
    arr[0, 4] = nan
    arr[1, 3] = nan
    arr[1, 4] = nan
    data = fromarray(arr, engine=eng)
    val = data.nanvar().toarray()
    expected = nanvar(data.toarray(), axis=0)
    assert allclose(val, expected, equal_nan=True)
    assert str(val.dtype) == 'float64'
Пример #9
0
    def test_GWAS(self):
        Y = np.genfromtxt(self._liverPhenos)

        # Loading npdump and first 1000 snps for speed
        K = np.load(self._liverKinshipMatrix)

        snps = np.load(self._liver1000SNPFile).T
        vars = np.nanvar(snps, axis=0) #variances across the rows ignoring NaN, used to check which SNPs were not polymorphic across the given individuals

        TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=True)

        #SNPs that are not polymorphic (in the given individuals being tested) will have variance 0, this check ensures
        #that only these SNPs have a return value of NaN
        for i in range(len(PS)):
           self.assertTrue( not math.isnan(PS[i]) or vars[i] == 0, "NaN found in results corresponding to polymorphic SNP")

        results = np.array([TS,PS])
        ansKey = np.load(self._liverTestFile)

        #these results include np.nan values, so allclose cannot be used, also the results are similar with each
        #run, but do vary, so we can only check for similarity to a precision of about 1e-06
        for i in range(results.shape[0]):
            for j in range(results.shape[1]):
                a = results[i,j]
                b = ansKey[i,j]
                self.assertTrue( (np.isnan(a) and np.isnan(b)) or abs(a - b) < 1e-06 ,
                                 "Mismatch on values: " + str(a) + " and " + str(b))
Пример #10
0
 def c(self, P, h, bw):
     """Calculate the sill"""
     c = np.nanvar(P[:, 2])
     if h == 0:
         return c
     else:
         return c - self.semivarh(P, h, bw)
	    def compute(self, today, assets, out, close):

	    	# get returns dataset
	        returns = ((close - np.roll(close, 1, axis=0)) / np.roll(close, 1, axis=0))[1:]

	        # get index of benchmark
	        benchmark_index = np.where((assets == 8554) == True)[0][0]

	        # get returns of benchmark
	        benchmark_returns = returns[:, benchmark_index]
	        
	        # prepare X matrix (x_is - x_bar)
	        X = benchmark_returns
	        X_bar = np.nanmean(X)
	        X_vector = X - X_bar
	        X_matrix = np.tile(X_vector, (len(returns.T), 1)).T

	        # prepare Y matrix (y_is - y_bar)
	        Y_bar = np.nanmean(close, axis=0)
	        Y_bars = np.tile(Y_bar, (len(returns), 1))
	        Y_matrix = returns - Y_bars

	        # prepare variance of X
	        X_var = np.nanvar(X)

	        # multiply X matrix an Y matrix and sum (dot product)
	        # then divide by variance of X
	        # this gives the MLE of Beta
	        out[:] = (np.sum((X_matrix * Y_matrix), axis=0) / X_var) / (len(returns))
Пример #12
0
def cal_stats(in_fc, col_names):
    """Calculate stats for an array of double types, with nodata (nan, None)
    :  in the column.
    :Requires:
    :---------
    : in_fc - input featureclass or table
    : col_names - the columns... numeric (floating point, double)
    :
    :Notes:
    :------  see the args tuple for examples of nan functions
    :  np.nansum(b, axis=0)   # by column
    :  np.nansum(b, axis=1)   # by row
    :  c_nan = np.count_nonzero(~np.isnan(b), axis=0) count nan if needed
    """
    a = arcpy.da.FeatureClassToNumPyArray(in_fc, col_names)  # "*")
    b = a.view(np.float).reshape(len(a), -1)
    if len(a.shape) == 1:
        ax = 0
    else:
        ax = [1, 0][True]  # ax = [1, 0][colwise]  colwise= True
    mask = np.isnan(b)
    cnt = np.sum(~mask, axis=ax, dtype=np.intp, keepdims=False)
    n_sum = np.nansum(b, axis=0)
    n_mean = np.nanmean(b, axis=0)
    n_var = np.nanvar(b, axis=0)
    n_std = np.nanstd(b, axis=0)
    sk, kurt = skew_kurt(b, avg=n_mean, var_x=n_var, std_x=n_std,
                         col=True, mom='both')
    args = (col_names, cnt, n_sum, np.nanmin(b, axis=0), np.nanmax(b, axis=0),
            np.nanmedian(b, axis=0), n_mean, n_std, n_var, sk, kurt)
    return col_names, args
    def _fit_model(self, fcol, dis):

        """Determine the best fit for one feature column given distribution name

        Parameters
        ----------
        fcol: feature column, array
        dis: distribution name, String


        Returns
        ----------
        function: fit model with feature as argument

        """
        if dis == 'ratio':
            itfreq = itemfreq(fcol)
            uniqueVars = itfreq[:,0]
            freq = itfreq[:,1]
            rat = freq/sum(freq)
            rat = dict(zip(uniqueVars, rat.T))
            func = lambda x: self. funcs[dis](x, rat)
        if dis == 'poisson':
            lamb = np.nanmean(fcol, axis = 0)
            func = lambda x: self.funcs[dis](x, lamb)
        if dis ==   'norm':
            sigma = np.nanvar(fcol, axis=0)
            theta = np.nanmean(fcol, axis = 0)
            func = lambda x: self.funcs[dis](x, sigma, theta)
        return np.vectorize(func)
Пример #14
0
def cal_stats(a):
    """Calculate stats for an array of double types, with nodata (nan, None)
    in the column.

    Notes
    -----
    see the args tuple for examples of nan functions::

        >>> np.nansum(b, axis=0)   # by column
        >>> np.nansum(b, axis=1)   # by row
        >>> c_nan = np.count_nonzero(~np.isnan(b), axis=0) count nan if needed
    """
    if len(a.shape) == 1:
        ax = 0
    else:
        ax = [1, 0][True]  # ax = [1, 0][colwise]  colwise= True
    mask = np.isnan(a)
    n = len(a)
    cnt = np.sum(~mask, axis=ax, dtype=np.intp, keepdims=False)
    n_sum = np.nansum(a, axis=0)
    n_min = np.nanmin(a, axis=0)
    n_max = np.nanmax(a, axis=0)
    n_mean = np.nanmean(a, axis=0)
    n_med = np.nanmedian(a, axis=0)
    n_std = np.nanstd(a, axis=0)
    n_var = np.nanvar(a, axis=0)
    col_names = ['N', 'n', 'sum', 'min', 'max', 'mean', 'median',
                 'std', 'var', 'skew', 'kurt']
    sk, kurt = skew_kurt(a, avg=n_mean, var_x=n_var, std_x=n_std,
                         col=True, mom='both')
    args = [n, cnt, n_sum, n_min, n_max, n_mean, n_med, n_std, n_var, sk, kurt]
    z = list(zip(col_names, args))
    s = "".join(["\n{:<6} {}".format(*i) for i in z])
    return s
Пример #15
0
def std(values, errors):
    """takes two numpy arrays: values and errors in these values
    estimate the grand standard deviation as error value
    not accurate for multiple rounds of averaging"""
    var1 = np.nanvar(values, ddof=1)    # variance in the values
    var2 = np.nanmean(np.square(errors)) # mean-square of input errors
    return np.sqrt(var1 + var2)
Пример #16
0
    def _compute_zr2011_dataframe(self):
        """ 
        Get the dataframe needed for the mid-range temperatures, 
        and add a mass bin for 6000 K using the gyrochronology relation.
        """
        # Read in the dataframe from disk.
        df = pd.read_csv('data/velocity_pdfs.csv', header=1)

        # Compute equatorial velocities for a 6000 K star at this age.
        teff = np.ones_like(self.age) * 6000.0
        v_eq = self._gyro_velocities(teff, self.age).to(u.km/u.s).value 
        v_eq[v_eq > 500] = np.nan  # Remove unphysical vsini values.

        # Calculate approximate maxwellian parameters from the velocities.
        alpha = np.sqrt(np.nanvar(v_eq) * np.pi / (3*np.pi - 8))
        l = np.nanmedian(v_eq) - 2*alpha*np.sqrt(2/np.pi)
          
        # Add a row to the dataframe with this information
        df.loc[df.index.max()+1] = [1.0, 1.24, 0, 25, 100, alpha*np.sqrt(2), l]

        # Calculate a few more columns for the dataframe
        df['mid_mass'] = (df.mass_high + df.mass_low) / 2.0
        df['slow_alpha'] = df.slow_mu / np.sqrt(2)
        df['fast_alpha'] = df.fast_mu / np.sqrt(2)
        df['slow_frac'] /= 100.0
        df['fast_frac'] /= 100.0

        # Sort so that interpolation works
        df = df.sort_values(by='mid_mass').reset_index()

        return df
Пример #17
0
def autocorrelation_hourly(data):
    from matplotlib.pyplot import plot, xlabel, ylabel, show
    from numpy import nanmean, nanvar, mean, multiply, arange
    # We choose 7 days and plus-minus 6 hours as the possible periodicity
    # in traffic.
    START_PERIOD = 7*24 - 6
    END_PERIOD = 7*24 + 6
    V = replace_placeholder(data, value = nanmean(data))
    # We don't take the variance of entries that we replaced with nanmean.
    sigma2 = nanvar(data)
    autocorr_dict = {period:0 for period in range(START_PERIOD,END_PERIOD+1)}
    Deviations = V - nanmean(V, axis=0)
    for period in range(START_PERIOD, END_PERIOD+1):
        autocorr = nanmean([multiply(Deviations[t],Deviations[t+period])
                            for t in range(len(V)-period)])/sigma2
        autocorr_dict[period] = autocorr
        print(period)

    # Peaks in plot correspond to high autocorrelation i.e. high
    # periodicity trend.
    plot(arange(START_PERIOD, END_PERIOD+1),
         [autocorr_dict[period] for period in range(START_PERIOD, END_PERIOD+1)],
         'o-')

    ylabel('Average autocorellation over full links')
    xlabel('Assumed period of data (in hours)')
    show()
    #legend(bbox_to_anchor=(1.35, 0.95))
    return None
Пример #18
0
def test_var():
    out = df.i32.reshape((2, 2, 5)).var(axis=2).T
    eq(c.points(df, 'x', 'y', ds.var('i32')), out)
    eq(c.points(df, 'x', 'y', ds.var('i64')), out)
    out = np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T
    eq(c.points(df, 'x', 'y', ds.var('f32')), out)
    eq(c.points(df, 'x', 'y', ds.var('f64')), out)
Пример #19
0
def get_FR_stats(hdf, save=False, return_=False, plot=True):
    eps = 10**-12
    sc = hdf.root.task[:]['spike_counts']
    mn = np.nanmean(sc[:, :, 0], axis=0)
    vr = np.nanvar(sc[:, :, 0], axis=0)
    ff = vr/(mn+eps)

    if plot:
        f, ax = plt.subplots()
        ax.hist(np.mean(sc[:, :, 0], axis=0))
        ax.set_title('Hist. of Mean FR.')
        ax.set_xlabel('FR')
        ax.set_ylabel('Counts')

        f2, ax2 = plt.subplots()
        try:
            ax2.hist(ff)
        except:
            print 'error FF: ', ff
        ax2.set_title('Hist. of Fano Factor')
        ax2.set_xlabel('Fano Factor')
        ax2.set_ylabel('Counts')

    if save:
        f.savefig(hdf.filename[:-4]+'_mnFR.png', format='png')
        f2.savefig(hdf.filename[:-4]+'_FF.png', format='png')
    if return_:
        return np.mean(sc[:,:,0], axis=0), ff 
Пример #20
0
def ExponentialTransformErrVarShapingFactor(data, comparedata,G=10):
    """
    This function use the variance of the error terms between observed and simulated data as a base to claculate the
    likelihood.

    .. math::

            p=-G\\cdot Var(E(x))

    The factor `G` comes from the DREAMPar model. So this factor can be changed according to the used model.

    For more details see also: http://onlinelibrary.wiley.com/doi/10.1029/95WR03723/epdf.

    `Usage:` Maximize the likelihood value guides to the best model.

    :param data: observed measurements as a numerical list
    :type data: list
    :param comparedata: simulated data from a model which should fit the original data somehow
    :type comparedata: list
    :param G: DREAMPar model parameter `G`
    :type G: float
    :return: the p value as a likelihood
    :rtype: float
    """
    __standartChecksBeforeStart(data, comparedata)

    errArr = np.array(__calcSimpleDeviation(data, comparedata))

    return -G*np.nanvar(errArr)
Пример #21
0
def computeFisherScore(data, class_ass, nb_classes):
	'''
	The Fisher Score assigns a rank to each of the features, with the goal of finding the subset of features of the data
	such that in the data space spanned by the selected features, the distance between data points in different classes are
	as large as possible and the distance between data points in the same class are as small as possible.

	Input
		- data: matrix of inputs, size N x M, where N is the number of trials and M is the number of features
		- class_ass: array of class assignments, size 1 x N, where N is the number of trials
		- nb_classes: number of classes
	Output
		- Fscores: array of scores, size 1 x M, for each of the features
	'''
	num_trials, num_features = data.shape
	within_class_mean = np.zeros([nb_classes,num_features]) 	# mean for each feature within each class
	within_class_var = np.zeros([nb_classes,num_features]) 		# variance for each feature within each class
	num_points_within_class = np.zeros([1,nb_classes])  			# number of points within each class 
	
	for i in range(nb_classes):
		in_class = np.ravel(np.nonzero(class_ass == i))
		num_points_within_class[0,i] = len(in_class)
		class_data = data[in_class,:]  	# extract trails classified as belonging to this class
		within_class_mean[i,:] = np.nanmean(class_data, axis=0)  # length of mean vector should be equal to M, the number of features
		within_class_var[i,:] = np.nanvar(class_data,axis=0)

	between_class_mean = np.asmatrix(np.mean(within_class_mean,axis=0))
	between_class_mean = np.dot(np.ones([nb_classes,1]), between_class_mean)

	Fscores = np.dot(num_points_within_class,np.square(within_class_mean - between_class_mean))/np.dot(num_points_within_class,within_class_var)

	return Fscores
Пример #22
0
def get_stats(a):
    """Computes mean, D_T or D_R, and standard error for a list.
    """
    a = np.asarray(a)
    n = a.shape[-1]
    keepdims = a.ndim > 1
    M = np.nanmean(a, -1, keepdims=keepdims)
    # c = a - M
    # variance = np.einsum('...j,...j->...', c, c)/n
    variance = np.nanvar(a, -1, keepdims=keepdims, ddof=1)
    SE = np.sqrt(variance)/sqrt(n - 1)
    SK = skew(a, -1, nan_policy='omit')
    KU = kurtosis(a, -1, nan_policy='omit')
    SK_t = skewtest(a, -1, nan_policy='omit')
    KU_t = kurtosistest(a, -1, nan_policy='omit')
    if keepdims:
        SK = SK[..., None]
        KU = KU[..., None]
    else:
        SK = float(SK)
        KU = float(KU)
    stat = {'mean': M, 'var': variance, 'std': SE,
            'skew': SK, 'skew_test': float(SK_t.statistic),
            'kurt': KU, 'kurt_test': float(KU_t.statistic)}
    print '\n'.join(['{:>10}: {: .4f}'.format(k, v) for k, v in stat.items()])
    return stat
Пример #23
0
    def fit_cols(self, attributes, x, n_vals):
        """
        Return `EuclideanColumnsModel` with stored means and variances
        for normalization and imputation.
        """
        def nowarn(msg, cat, *args, **kwargs):
            if cat is RuntimeWarning and (
                    msg == "Mean of empty slice"
                    or msg == "Degrees of freedom <= 0 for slice"):
                if self.normalize:
                    raise ValueError("some columns have no defined values")
            else:
                orig_warn(msg, cat, *args, **kwargs)

        self.check_no_discrete(n_vals)
        # catch_warnings resets the registry for "once", while avoiding this
        # warning would be annoying and slow, hence patching
        orig_warn = warnings.warn
        with patch("warnings.warn", new=nowarn):
            means = np.nanmean(x, axis=0)
            vars = np.nanvar(x, axis=0)
        if self.normalize and not vars.all():
            raise ValueError("some columns are constant")
        return EuclideanColumnsModel(
            attributes, self.impute, self.normalize, means, vars)
Пример #24
0
def plot_profile_variance_dbz(synth):

	fig,ax=plt.subplots()
	colors=get_colors(synth)
	c=0	
	for key,value in synth.iteritems():
		for v in value:
			scase=str(key).zfill(2)
			sleg=str(v).zfill(2)
			synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf'
			DBZ = read_synth(synthfile,'MAXDZ')
			Z = read_synth(synthfile,'z')
			x=[]
			y=[]
			for n,z in enumerate(Z[1:15]):
				dbz=DBZ[:,:,n+1]
				# dbz[dbz<15]=np.nan
				x.append(np.nanvar(dbz))
				# zz=10**(dbz/10.)
				# x.append(np.nanvar(zz)) # similar than dbz but in linear scale
				y.append(z)

			label='Case: '+scase+' Leg: '+sleg
			ax.plot(x,y,'-',label=label,color=colors[c])
			ax.set_ylim([0,4])
			ax.set_xlim([0,70])
			ax.set_xlabel('Reflectivity variance [dBZ^2]')
			ax.set_ylabel('Altitude MSL [km]')
			c+=1
	plt.suptitle('Spatial variance at P3 synth levels ')			
	plt.draw()
	plt.legend()
Пример #25
0
def plot_profile_variance_wind(synth):

	fig,ax=plt.subplots()
	colors=get_colors(synth)
	c=0	
	for key,value in synth.iteritems():
		for v in value:
			scase=str(key).zfill(2)
			sleg=str(v).zfill(2)
			synthfile=base_dir+'c'+scase+'/leg'+sleg+'.cdf'
			U = read_synth(synthfile,'F2U')
			V = read_synth(synthfile,'F2V')
			Z = read_synth(synthfile,'z')
			x=[]
			y=[]
			for n,z in enumerate(Z[1:15]):
				u=U[:,:,n+1]
				v=V[:,:,n+1]
				wdir = (np.arctan2(u,v)*180/np.pi)+180.
				x.append(np.nanvar(wdir))
				y.append(z)
			label='Case: '+scase+' Leg: '+sleg
			ax.plot(x,y,'-',label=label,color=colors[c])
			ax.set_ylim([0,4])
			ax.set_xlim([0,700])
			ax.set_xlabel('Wind direction variance [deg^2]')
			ax.set_ylabel('Altitude MSL [km]')
			c+=1
	plt.suptitle('Spatial variance at P3 synth levels ')			
	plt.draw()
	plt.legend()
Пример #26
0
 def nanvar(self):
     """
     Compute the sum across images ignoring the NaNs
     """
     if self.mode == 'spark':
         return self._constructor(self.values.nanvar(axis=0, keepdims=True))
     else:
         return self._constructor(expand_dims(nanvar(self.values, axis=0), axis=0))
Пример #27
0
 def nanvar(self):
     """
     Compute the variance across records
     """
     if self.mode == 'spark':
         return self._constructor(self.values.nanvar(axis=self.baseaxes, keepdims=True))
     else:
         return self._constructor(expand_dims(nanvar(self.values, axis=self.baseaxes), axis=self.baseaxes[0]))
Пример #28
0
 def test_nanvar_with_ddof(self):
     x = np.random.uniform(0, 10, (20, 100))
     np.fill_diagonal(x, np.nan)
     for axis in [None, 0, 1]:
         np.testing.assert_almost_equal(
             np.nanvar(x, axis=axis, ddof=10),
             nanvar(csr_matrix(x), axis=axis, ddof=10),
         )
Пример #29
0
def NashSutcliffeEfficiencyShapingFactor(data, comparedata,G=10):
    """
    This function use the opposite ratio of variance of the error terms between observed and simulated and the variance
    of the observed data as a base to claculate the
    likelihood and transform the values with the logarithm.

    .. math::

            p=G\\cdot\\log(1-\\frac{Var(E(x)}{Var(Y)})

    The factor `G` comes from the DREAMPar model. So this factor can be changed according to the used model.

    For more details see also: http://onlinelibrary.wiley.com/doi/10.1029/95WR03723/epdf.

    `Usage:` Maximize the likelihood value guides to the best model. If the function return NAN, than you can not use this
    calculation method or the `comparedata` is too far away from `data`.

    :param data: observed measurements as a numerical list
    :type data: list
    :param comparedata: simulated data from a model which should fit the original data somehow
    :type comparedata: list
    :param G: DREAMPar model parameter `G`
    :type G: float
    :return: the p value as a likelihood
    :rtype: float
    """

    __standartChecksBeforeStart(data, comparedata)

    errArr = np.array(__calcSimpleDeviation(data, comparedata))

    if np.nanvar(data) == 0.0:
        warnings.warn("[NashSutcliffeEfficiencyShapingFactor] reaslized that the variance of the data is zero. Thereforee is no likelihood calculation possible")
        return np.NAN
    else:
        ratio = np.nanvar(errArr)/np.nanvar(data)

        if ratio > 1:
            warnings.warn("[NashSutcliffeEfficiencyShapingFactor]: The ratio between residual variation and observation "
                          "variation is bigger then one and therefore"
                          "we can not calculate the liklihood. Please use another function which fits to this data and / or "
                          "model")
            return np.NAN
        else:
            return G*np.log(1-ratio)
Пример #30
0
def test_var():
    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T,
                       coords=coords, dims=dims)
    assert_eq(c.points(ddf, 'x', 'y', ds.var('i32')), out)
    assert_eq(c.points(ddf, 'x', 'y', ds.var('i64')), out)
    out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                       coords=coords, dims=dims)
    assert_eq(c.points(ddf, 'x', 'y', ds.var('f32')), out)
    assert_eq(c.points(ddf, 'x', 'y', ds.var('f64')), out)
Пример #31
0
def causal_snp(n=10):
    f = '../data/arabi.bim'
    d = pd.read_csv(f, sep='\t', header=None)
    for j in range(n):
        r = np.random.choice(d[1], 10)
        with open('causal_{}.snplist'.format(j), 'w+') as f:
            for i in r:
                f.write(i + '\n')

    print('into gene_expr')
    bxy = args['bxy']
    bzx = args['bzx']
    rzx2 = args['rzx2']
    rxy2 = args['rxy2']
    c = args['c']
    rc2 = args['rc2']
    if causal_lst:
        m = 1200
        #		n = np.random.randint(30, 100)
        n = 600
        r1 = np.random.choice(tmp['SNP'], n, replace=False)
        p = np.array(tmp[tmp['SNP'].isin(r1)]['MAF'])
        w = binomial(1, p)
        r2 = np.random.choice(tmp['SNP'], m - n, replace=False)
        p2 = np.array(tmp[tmp['SNP'].isin(r2)]['MAF'])
        w2 = binomial(1, p2)

        with open('tmp/w_{}'.format(ind), 'w+') as f:
            for i in w:
                f.write(str(i) + '\n')
        with open('tmp/w2_{}'.format(ind), 'w+') as f:
            for i in w2:
                f.write(str(i) + '\n')
        with open('tmp/p_{}'.format(ind), 'w+') as f:
            #				f.write(' '.join(str(i))+'\n')
            for i in p:
                f.write(str(i) + '\n')
        with open('tmp/r1_{}'.format(ind), 'w+') as f:
            for i in r1:
                f.write(str(i) + '\n')
        with open('tmp/r2_{}'.format(ind), 'w+') as f:
            for i in r2:
                f.write(str(i) + '\n')
    if causal_files:
        r1 = np.array([i.strip() for i in open(causal_files[0]).readlines()])
        r2 = np.array([i.strip() for i in open(causal_files[1]).readlines()])
        p = np.array(
            [float(i.strip()) for i in open(causal_files[2]).readlines()])
        w = np.array(
            [int(i.strip()) for i in open(causal_files[3]).readlines()])
        w2 = np.array(
            [int(i.strip()) for i in open(causal_files[4]).readlines()])
        n = len(r1)
        m = len(r2)
        print(r1, r2, p, w, w2)
#	print(w.shape)
#	print(p.shape)
    z = (w - 2 * p) / (np.sqrt(2 * p * (1 - p)))
    z_bzx = z * bzx
    w_bzx = w * bzx
    var_z_bzx = np.nanvar(z_bzx)
    var_w_bzx = np.nanvar(w_bzx)
    if not rc2:
        rc2 = uniform(0, 0.5)
    sigmac_2 = var_z_bzx * rc2 / rzx2
    #	c = normal(0, sigmac_2, n)
    if not c:
        c = normal(0, sigmac_2)
#	ezx = normal(0, var_w_bzx*(1/rzx2 -1)-sigmac_2, n)
    ezx = normal(0, var_w_bzx * (1 / rzx2 - 1) - sigmac_2)
    #	print(z_bzx.shape)
    #	print(c.shape)
    #	print(ezx.shape)
    x = z_bzx + c + ezx
    x = sum(x)
    lst = list(r1) + list(r2)
    geno_012 = list(w) + list(w2)
    print(len(geno_012))
    print(len(lst))
    #	causal = dict(zip(lst, geno_012))
    causal = pd.DataFrame()
    causal['012'] = geno_012
    causal['SNP'] = lst
    ref = 'geno_ref.txt'
    ref = pd.read_csv(ref)
    geno_GCTA = pd.merge(causal, ref, left_on='SNP', right_on='SNP')
    #	print(geno_GCTA.head())
    pool = Pool(30)
    r = []
    r2 = []
    for i in range(geno_GCTA.shape[0]):
        r.append(pool.apply_async(
            gene_expr_worker,
            [geno_GCTA.iloc[i, :]],
        ))
    for i in r:
        r2.append(i.get())
    geno_GCTA = pd.concat(r2, axis=1).T
    print(geno_GCTA.head())
    #	geno_GCTA = geno_GCTA.apply(gene_expr_worker, axis=1)
    geno_GCTA = geno_GCTA.sort_values(by='SNP')
    print(geno_GCTA.head())
    out = ''.join(geno_GCTA['new'])
    print(x, out[:10])
    return (x, out, sigmac_2, c)
Пример #32
0
        visual_util.plot_base_prediction(
            base_pred=base_pred_dict,
            model_names=ensemble_model_names,
            X_valid=X_valid,
            y_valid=y_valid_mean,
            title_size=16,
            legend_size=12,
            y_range=[-2.5, 2.5],
            save_addr=os.path.join(
                _SAVE_ADDR_PREFIX,
                "{}/ensemble_base_model_fit_no_data.png".format(family_name)))
    """ 3.5.2. visualize: ensemble posterior predictive mean """

    posterior_mean_mu = np.nanmean(ensemble_mean_val, axis=0)
    posterior_mean_cov = np.nanvar(ensemble_mean_val, axis=0)
    posterior_resid_cov = np.nanvar(ensemble_resid_valid_sample, axis=0)

    posterior_dist_cov = np.nanvar(ensemble_sample_val,
                                   axis=0) - posterior_resid_cov

    posterior_mean_median = np.nanmedian(ensemble_mean_val, axis=0)
    posterior_mean_quantiles = [
        np.percentile(ensemble_mean_val, [100 - (100 - q) / 2, (100 - q) / 2],
                      axis=0) for q in [68, 95, 99]
    ]

    visual_util.gpr_1d_visual(
        posterior_mean_mu,
        pred_cov=posterior_mean_cov,
        X_train=X_test,
Пример #33
0
def getStatistics(windows):

	mean = np.nanmean(windows, axis=0)
	var = np.nanvar(windows, axis=0)
	return mean, var
Пример #34
0
DT = Te - Tp
PacT = T2 - T1

# ---------------------------------------------- #
# AVERAGE AND VARIANCE T FIELD
# ---------------------------------------------- #

Tav = T[0]
for i in range(0, len(T[0])):
    for j in range(0, len(T[0, 0])):
        Tav[i, j] = np.nanmean(T[:, i, j])

Tvar = T[1]
for i in range(0, len(T[0])):
    for j in range(0, len(T[0, 0])):
        Tvar[i, j] = np.nanvar(T[:, i, j])

# ---------------------------------------------- #
# REMOVE SEASONALITY
# ---------------------------------------------- #

Dtau = []
DTmeans = []
Temppart = []
Monthsing = np.linspace(1, 12, 12)
for i in range(0, 12):
    meantje = np.nanmean(tauvec[np.arange(i, 36, 12)])
    Dtau.append(meantje)
    meantje = np.nanmean(Te[np.arange(i, 36, 12)] - Tp[np.arange(i, 36, 12)])
    DTmeans.append(meantje)
    meantje = np.nanmean(T2[np.arange(i, 36, 12)] - T1[np.arange(i, 36, 12)])
Пример #35
0
fracy2 = 390
fracx1 = 180
fracx2 = 250

a[fracy1:fracy2, fracx1:fracx2] = h[0, fracy1:fracy2, fracx1:fracx2]
print a.shape, type(a)

# Filter by elevation(band)
za = np.ma.masked_outside(altitude, 1000, 1500)
a[za.mask == True] = np.nan

T0 = 273.15
# Do statistics
print "Mean: {0}".format(np.nanmean(a))
print "Standard deviation: {0}".format(np.nanstd(a, dtype=np.float64))
print "Variance: {0}".format(np.nanvar(a))
print "Average: {0}".format(np.average(a))
print "Min: {0}".format(np.nanmin(a))
print "Max: {0}".format(np.nanmax(a))

'''
Can use 'altitude' to filter out alpine regions or elevation bands
'''

# View map and data
fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111)

xyext = [x[0], x[-1], y[0], y[-1]]
plt.imshow(bkgmap, zorder=0, origin='lower', cmap='pink', extent=xyext)
plt.imshow(a, alpha=0.8, zorder=1, origin='lower', cmap='seismic', extent=xyext)
Пример #36
0
    # Create bounding box limits
    xmin, xmax, ymin, ymax = (xp.min() - 50. * dx), (xp.max() + 50. * dx), \
                               (yp.min() - 50. * dy), (yp.max() + 50. * dy)

# Construct the grid
Xi, Yi = make_grid(xmin, xmax, ymin, ymax, dx, dy)

# Flatten prediction grid
xi = Xi.ravel()
yi = Yi.ravel()

# Markov-model parameter
a = 0.9132 * alpha

# Signal variance of entire field
c0 = np.nanvar(zp)

# Compute noise variance
crms = sigma * sigma

# Output vectors
zi = np.ones(len(xi)) * np.nan
ei = np.ones(len(xi)) * np.nan
ni = np.ones(len(xi)) * np.nan

# Determine nobs for tree
if mode == 'rand':
    n_quad = 16
else:
    n_quad = 8
Пример #37
0
def stats(X, weights=None, compute_variance=False):
    """
    Compute min, max, #nans, mean and variance.

    Result is a tuple (min, max, mean, variance, #nans, #non-nans) or an
    array of shape (len(X), 6).

    The mean and the number of nans and non-nans are weighted.

    Computation of variance requires an additional pass and is not enabled
    by default. Zeros are filled in instead of variance.

    Parameters
    ----------
    X : array_like, 1 or 2 dimensions
        Input array.
    weights : array_like, optional
        Weights, array of the same length as `x`.
    compute_variance : bool, optional
        If set to True, the function also computes variance.

    Returns
    -------
    out : a 6-element tuple or an array of shape (len(x), 6)
        Computed (min, max, mean, variance or 0, #nans, #non-nans)

    Raises
    ------
    ValueError
        If the length of the weight vector does not match the length of the
        array
    """
    is_numeric = np.issubdtype(X.dtype, np.number)
    is_sparse = sp.issparse(X)
    weighted = weights is not None and X.dtype != object

    def weighted_mean():
        if is_sparse:
            w_X = X.multiply(sp.csr_matrix(np.c_[weights] / sum(weights)))
            return np.asarray(w_X.sum(axis=0)).ravel()
        else:
            return np.nansum(X * np.c_[weights] / sum(weights), axis=0)

    if X.size and is_numeric and not is_sparse:
        nans = np.isnan(X).sum(axis=0)
        return np.column_stack(
            (np.nanmin(X, axis=0), np.nanmax(X, axis=0),
             np.nanmean(X, axis=0) if not weighted else weighted_mean(),
             np.nanvar(X, axis=0) if compute_variance else np.zeros(
                 X.shape[1]), nans, X.shape[0] - nans))
    elif is_sparse and X.size:
        if compute_variance:
            raise NotImplementedError

        non_zero = np.bincount(X.nonzero()[1], minlength=X.shape[1])
        X = X.tocsc()
        return np.column_stack((
            nanmin(X, axis=0),
            nanmax(X, axis=0),
            nanmean(X, axis=0) if not weighted else weighted_mean(),
            np.zeros(X.shape[1]),  # variance not supported
            X.shape[0] - non_zero,
            non_zero))
    else:
        nans = (~X.astype(bool)).sum(
            axis=0) if X.size else np.zeros(X.shape[1])
        return np.column_stack(
            (np.tile(np.inf,
                     X.shape[1]), np.tile(-np.inf,
                                          X.shape[1]), np.zeros(X.shape[1]),
             np.zeros(X.shape[1]), nans, X.shape[0] - nans))
Пример #38
0
    def run(self):
        """
        Run method of the module. Fake positive companions are injected for a range of separations
        and angles. The magnitude of the contrast is changed stepwise and lowered by a factor 2 if
        needed. Once the fractional accuracy of the false positive fraction threshold is met, a
        linear interpolation is used to determine the final contrast. Note that the sigma level
        is fixed therefore the false positive fraction changes with separation, following the
        Student's t-distribution (Mawet et al. 2014).

        :return: None
        """

        if self.m_angle[0] < 0. or self.m_angle[0] > 360. or self.m_angle[1] < 0. or \
           self.m_angle[1] > 360. or self.m_angle[2] < 0. or self.m_angle[2] > 360.:
            raise ValueError(
                "The angular positions of the fake planets should lie between "
                "0 deg and 360 deg.")

        images = self.m_image_in_port.get_all()
        psf = self.m_psf_in_port.get_all()

        pixscale = self.m_image_in_port.get_attribute("PIXSCALE")

        self.m_aperture /= pixscale

        if psf.ndim == 3 and psf.shape[0] != images.shape[0]:
            warnings.warn(
                'The number of frames in psf_in_tag does not match with the number of '
                'frames in image_in_tag. Using the mean of psf_in_tag as PSF template.'
            )

        center = np.array([images.shape[2] / 2., images.shape[1] / 2.])

        pos_r = np.arange(self.m_separation[0] / pixscale,
                          self.m_separation[1] / pixscale,
                          self.m_separation[2] / pixscale)

        pos_t = np.arange(self.m_angle[0] + self.m_extra_rot,
                          self.m_angle[1] + self.m_extra_rot, self.m_angle[2])

        if self.m_cent_size is None:
            index_del = np.argwhere(pos_r - self.m_aperture <= 0.)
        else:
            index_del = np.argwhere(
                pos_r - self.m_aperture <= self.m_cent_size / pixscale)

        pos_r = np.delete(pos_r, index_del)

        if self.m_edge_size is None or self.m_edge_size / pixscale > images.shape[
                1] / 2.:
            index_del = np.argwhere(
                pos_r + self.m_aperture >= images.shape[1] / 2.)
        else:
            index_del = np.argwhere(
                pos_r + self.m_aperture >= self.m_edge_size / pixscale)

        pos_r = np.delete(pos_r, index_del)

        fake_mag = np.zeros((len(pos_r), len(pos_t)))
        fake_fpf = np.zeros((len(pos_r)))

        count = 1

        sys.stdout.write("Running ContrastCurveModule...\n")
        sys.stdout.flush()

        for m, sep in enumerate(pos_r):
            fpf_threshold = student_fpf(self.m_sigma, sep, self.m_aperture,
                                        self.m_ignore)
            fake_fpf[m] = fpf_threshold

            for n, ang in enumerate(pos_t):
                sys.stdout.write("Processing position " + str(count) + " out of " + \
                      str(np.size(fake_mag)))
                sys.stdout.flush()

                x_fake = center[0] + sep * math.cos(
                    np.radians(ang + 90. - self.m_extra_rot))
                y_fake = center[1] + sep * math.sin(
                    np.radians(ang + 90. - self.m_extra_rot))

                num_mag = np.size(fake_mag[m, 0:n])
                num_nan = np.size(np.where(np.isnan(fake_mag[m, 0:n])))

                if n == 0 or num_mag - num_nan == 0:
                    list_mag = [self.m_magnitude[0]]
                    mag_step = self.m_magnitude[1]

                else:
                    list_mag = [np.nanmean(fake_mag[m, 0:n])]
                    mag_step = 0.1

                list_fpf = []

                iteration = 1

                while True:
                    sys.stdout.write('.')
                    sys.stdout.flush()

                    mag = list_mag[-1]

                    fake_planet = FakePlanetModule(
                        position=(sep * pixscale, ang),
                        magnitude=mag,
                        psf_scaling=self.m_psf_scaling,
                        interpolation="spline",
                        name_in="fake_planet",
                        image_in_tag=self.m_image_in_tag,
                        psf_in_tag=self.m_psf_in_tag,
                        image_out_tag="contrast_fake",
                        verbose=False)

                    fake_planet.connect_database(self._m_data_base)
                    fake_planet.run()

                    prep = PSFpreparationModule(name_in="prep",
                                                image_in_tag="contrast_fake",
                                                image_out_tag="contrast_prep",
                                                image_mask_out_tag=None,
                                                mask_out_tag=None,
                                                norm=self.m_norm,
                                                resize=None,
                                                cent_size=self.m_cent_size,
                                                edge_size=self.m_edge_size,
                                                verbose=False)

                    prep.connect_database(self._m_data_base)
                    prep.run()

                    psf_sub = PcaPsfSubtractionModule(
                        name_in="pca_contrast",
                        pca_numbers=self.m_pca_number,
                        images_in_tag="contrast_prep",
                        reference_in_tag="contrast_prep",
                        res_mean_tag="contrast_res_mean",
                        res_median_tag=None,
                        res_arr_out_tag=None,
                        res_rot_mean_clip_tag=None,
                        extra_rot=self.m_extra_rot,
                        verbose=False)

                    psf_sub.connect_database(self._m_data_base)
                    psf_sub.run()

                    res_input_port = self.add_input_port("contrast_res_mean")
                    im_res = res_input_port.get_all()

                    if len(im_res.shape) == 3:
                        if im_res.shape[0] == 1:
                            im_res = np.squeeze(im_res, axis=0)
                        else:
                            raise ValueError(
                                "Multiple residual images found, expecting only one."
                            )

                    if self.m_pca_out_port is not None:
                        if count == 1 and iteration == 1:
                            self.m_pca_out_port.set_all(im_res, data_dim=3)
                        else:
                            self.m_pca_out_port.append(im_res, data_dim=3)

                    _, _, fpf = false_alarm(im_res, x_fake, y_fake,
                                            self.m_aperture, self.m_ignore)

                    list_fpf.append(fpf)

                    if abs(fpf_threshold -
                           list_fpf[-1]) < self.m_accuracy * fpf_threshold:
                        if len(list_fpf) == 1:
                            fake_mag[m, n] = list_mag[0]

                            sys.stdout.write("\n")
                            sys.stdout.flush()
                            break

                        else:
                            if (fpf_threshold > list_fpf[-2] and fpf_threshold < list_fpf[-1]) or \
                               (fpf_threshold < list_fpf[-2] and fpf_threshold > list_fpf[-1]):

                                fpf_interp = interp1d(list_fpf[-2:],
                                                      list_mag[-2:], 'linear')
                                fake_mag[m, n] = fpf_interp(fpf_threshold)

                                sys.stdout.write("\n")
                                sys.stdout.flush()
                                break

                            else:
                                pass

                    if list_fpf[-1] < fpf_threshold:
                        if list_mag[-1] + mag_step in list_mag:
                            mag_step /= 2.

                        list_mag.append(list_mag[-1] + mag_step)

                    else:
                        if np.size(list_fpf) > 2 and \
                           list_mag[-1] < list_mag[-2] and list_mag[-2] < list_mag[-3] and \
                           list_fpf[-1] > list_fpf[-2] and list_fpf[-2] < list_fpf[-3]:

                            warnings.warn(
                                "Magnitude decreases but false positive fraction "
                                "increases. Adjusting magnitude to %s and step size "
                                "to %s" % (list_mag[-3], mag_step / 2.))

                            list_fpf = []
                            list_mag = [list_mag[-3]]
                            mag_step /= 2.

                        else:
                            if list_mag[-1] - mag_step in list_mag:
                                mag_step /= 2.

                            list_mag.append(list_mag[-1] - mag_step)

                    if list_mag[-1] <= 0.:
                        warnings.warn(
                            "The relative magnitude has become smaller or equal to "
                            "zero. Adjusting magnitude to 7.5 and step size to 0.1."
                        )

                        list_mag[-1] = 7.5
                        mag_step = 0.1

                    iteration += 1

                    if iteration == 50:
                        warnings.warn(
                            "ContrastModule could not converge at the position of "
                            "%s arcsec and %s deg." % (sep * pixscale, ang))

                        fake_mag[m, n] = np.nan

                        sys.stdout.write("\n")
                        sys.stdout.flush()

                        break

                count += 1

        result = np.column_stack((pos_r * pixscale, np.nanmean(fake_mag,
                                                               axis=1),
                                  np.nanvar(fake_mag, axis=1), fake_fpf))

        self.m_contrast_out_port.set_all(result, data_dim=2)

        sys.stdout.write("Running ContrastCurveModule... [DONE]\n")
        sys.stdout.flush()

        if self.m_pca_out_port is not None:
            self.m_pca_out_port.add_history_information(
                "Contrast limits",
                str(self.m_sigma) + " sigma")

            self.m_pca_out_port.copy_attributes_from_input_port(
                self.m_image_in_port)

        self.m_contrast_out_port.add_history_information(
            "Contrast limits",
            str(self.m_sigma) + " sigma")

        self.m_contrast_out_port.copy_attributes_from_input_port(
            self.m_image_in_port)

        self.m_contrast_out_port.close_port()
Пример #39
0
def fit(X, y):
    idx = simplify_labels(y)
    mean_map = np.nanmean(X * idx, axis=0)
    var_map = np.nanvar(X * idx, axis=0)

    return np.array([mean_map, var_map])
Пример #40
0
    def get_r_hat(self, parameter_array):
        """
        Based on some fancy mathlab code, it return an array [R_stat, MR_stat]
        :param parameter_array: 3 dim array of parameter estimation sets
        :type parameter_array: list
        :return: [R_stat, MR_stat]
        :rtype: list
        """
        n, d, N = parameter_array.shape

        # Use only the last 50% of each chain (vrugt 2009), that means only the half of "d". Cause "d" ist the count
        # of the repetition and we use the d/2 to d of those values which are already not NAN
        whereIsNoNAN = np.logical_not(np.isnan(parameter_array))

        alreadyToNum = np.sum(whereIsNoNAN[0, :, 0])

        if alreadyToNum > 3:
            parameter_array = parameter_array[:,
                                              int(np.floor(alreadyToNum /
                                                           2)):alreadyToNum, :]
        else:
            # the later functions need some data to work right, so we use in this case 100% of NON NAN values
            parameter_array = parameter_array[:, 0:alreadyToNum, :]

        # I made a big confusion with d, n and  N, I figured it out by tests

        if n > 3:

            mean_chains = np.zeros((n, N))
            for i in range(n):
                for j in range(N):
                    mean_chains[i, j] = np.nanmean(parameter_array[i, :, j])

            B_uni = np.zeros(N)
            for i in range(N):
                B_uni[i] = d * np.nanvar(
                    mean_chains[:, i], ddof=1
                )  # make numpy Mathalab like: https://stackoverflow.com/a/27600240/5885054

            var_chains = np.zeros((n, N))
            for i in range(n):
                for j in range(N):
                    var_chains[i, j] = np.nanvar(parameter_array[i, :, j],
                                                 ddof=1)

            W_uni = np.zeros(N)
            for i in range(N):
                W_uni[i] = np.mean(var_chains[:, i])

            sigma2 = ((d - 1) / d) * W_uni + (1 / d) * B_uni

            whichW_UNIIsNull = W_uni == 0.0
            W_uni[whichW_UNIIsNull] = np.random.uniform(0.1, 1, 1)

            R_stat = np.sqrt((n + 1) / n * (np.divide(sigma2, W_uni)) -
                             (d - 1) / (n * d))

            #            W_mult = 0
            #            for ii in range(n):
            #                W_mult = W_mult + np.cov(np.nan_to_num(np.transpose(parameter_array[ii, :, :])), ddof=1)
            #
            #            W_mult = W_mult / n + 2e-52 * np.eye(N)
            #
            #            # Note that numpy.cov() considers its input data matrix to have observations in each column,
            #            # and variables in each row, so to get numpy.cov() to return what other packages do,
            #            # you have to pass the transpose of the data matrix to numpy.cov().
            #            # https://stats.stackexchange.com/a/263508/168054
            #
            #            B_mult = np.cov(np.nan_to_num(np.transpose(mean_chains))) + 2e-52 * np.eye(N)  # 2e-52 avoids problems with eig if var = 0
            #            M = np.linalg.lstsq(W_mult, B_mult)
            #            R = np.max(np.abs(np.linalg.eigvals(M[0])))
            #            MR_stat = np.sqrt((n + 1) / n * R + (d - 1) / d)
            return R_stat  #[R_stat, MR_stat]
Пример #41
0
def main():

    os.chdir(config.DIRPATH)
    masterdf = pd.read_csv("./!Data/Cleaner/RIAIMaster.csv")
    piindexlist = []
    for i in range(0, int((len(masterdf.columns) - 3) / 3)):
        piindexlist.append((i * 3) + 3)

    #Create csv for Statsum by Pi
    with open('./!Data/SumStat/RIAISumStatDay.csv', 'w') as csvFile:
        writer = csv.writer(
            csvFile,
            lineterminator='\n',
        )
        writer.writerow([
            'Datetime', 'DLMean', 'DLMedian', 'DLMax', 'DLStdDev', 'DLVar',
            'DLSkew', 'ULMean', 'ULMedian', 'ULMax', 'ULStdDev', 'ULVar',
            'ULSkew', 'RTTMean', 'RTTMedian', 'RTTMax', 'RTTStdDev', 'RTTVar',
            'RTTSkew'
        ])

        for rowindex in range(0, len(masterdf.index)):

            datetimevar = masterdf['Datetime'].loc[rowindex]

            #rtt stats
            rttarray = []
            for i in range(0, len(piindexlist)):
                rttarray.append(
                    masterdf[list(masterdf)[piindexlist[i]]].loc[rowindex])

            rttmean = np.nanmean(rttarray)
            rttmedian = np.nanmedian(rttarray)
            rttmax = np.nanmax(rttarray)
            rttstd = np.nanstd(rttarray)
            rttvar = np.nanvar(rttarray)
            nprttarray = array(rttarray)
            rttarraynan = nprttarray[~np.isnan(nprttarray)]
            rttskew = skew(rttarraynan)

            #dl stats
            dlarray = []
            for i in range(0, len(piindexlist)):
                dlarray.append(masterdf[list(masterdf)[piindexlist[i] +
                                                       1]].loc[rowindex])

            dlmean = np.nanmean(dlarray)
            dlmedian = np.nanmedian(dlarray)
            dlmax = np.nanmax(dlarray)
            dlstd = np.nanstd(dlarray)
            dlvar = np.nanvar(dlarray)
            npdlarray = array(dlarray)
            dlarraynan = npdlarray[~np.isnan(npdlarray)]
            dlskew = skew(dlarraynan)

            #ul stats
            ularray = []
            for i in range(0, len(piindexlist)):
                ularray.append(masterdf[list(masterdf)[piindexlist[i] +
                                                       2]].loc[rowindex])

            ulmean = np.nanmean(ularray)
            ulmedian = np.nanmedian(ularray)
            ulmax = np.nanmax(ularray)
            ulstd = np.nanstd(ularray)
            ulvar = np.nanvar(ularray)
            npularray = array(ularray)
            ularraynan = npularray[~np.isnan(npularray)]
            ulskew = skew(ularraynan)

            #write row
            newrow = [
                datetimevar, dlmean, dlmedian, dlmax, dlstd, dlvar, dlskew,
                ulmean, ulmedian, ulmax, ulstd, ulvar, ulskew, rttmean,
                rttmedian, rttmax, rttstd, rttvar, rttskew
            ]
            writer.writerow(newrow)
            print('Completed statsum for - ', datetimevar)

    open('./log.txt', "a").write(
        str(datetime.now()) +
        '  -  SumStat/RIAISumStatDay.csv successfully created \n')
@author: adrien



Classification data PET Dailt Gillot Airport


"""
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tasgrid as tg
import utm

#%%    Plot evo temporelle ETP

plt.close('all')

dataRaw = pd.read_csv('GillotDaily.csv', sep=';')
dataRaw.DATE = pd.DatetimeIndex(dataRaw.DATE)
dataRawbis = dataRaw.set_index('DATE')

plt.figure()
plt.plot(dataRawbis.ETPMON, 'b*')

Avg = np.nanmean(dataRawbis.ETPMON)
Var = np.nanvar(dataRawbis.ETPMON)
Std = np.nanstd(dataRawbis.ETPMON)

Rapport = Std / Avg * 100
Пример #43
0
 tp.columns = ['uid', i + '_max']
 if gn.empty == True:
     gn = tp
 else:
     gn = pd.merge(gn, tp, on='uid', how='left')
 #对历史数据求最小值
 tp = pd.DataFrame(
     df.groupby('uid').apply(lambda df: np.nanmin(df[i])).reset_index())
 tp.columns = ['uid', i + '_min']
 if gn.empty == True:
     gn = tp
 else:
     gn = pd.merge(gn, tp, on='uid', how='left')
 #对历史数据求方差
 tp = pd.DataFrame(
     df.groupby('uid').apply(lambda df: np.nanvar(df[i])).reset_index())
 tp.columns = ['uid', i + '_var']
 if gn.empty == True:
     gn = tp
 else:
     gn = pd.merge(gn, tp, on='uid', how='left')
 #对历史数据求极差
 tp = pd.DataFrame(
     df.groupby('uid').apply(
         lambda df: np.nanmax(df[i]) - np.nanmin(df[i])).reset_index())
 tp.columns = ['uid', i + '_ran']
 if gn.empty == True:
     gn = tp
 else:
     gn = pd.merge(gn, tp, on='uid', how='left')
 #对历史数据求变异系数,为防止除数为0,利用0.01进行平滑
                  [100 - (100 - q) / 2, (100 - q) / 2], axis=0)
    for q in [68, 95, 99]
]

# compute statistics, uncalibrated predictive distribution
posterior_dist_median_orig = np.nanmedian(ensemble_sample_val_orig, axis=0)

posterior_dist_quantiles_orig = [
    np.percentile(ensemble_sample_val_orig,
                  [100 - (100 - q) / 2, (100 - q) / 2], axis=0)
    for q in [68, 95, 99]
]

# compute statistics, calibrated predictive distribution
posterior_mean_median = np.nanmedian(ensemble_mean_val.T, axis=0)
posterior_mean_cov = np.nanvar(ensemble_mean_val.T, axis=0)

posterior_mean_adj_median = np.nanmedian(ensemble_mean_corrected_val.T, axis=0)
posterior_mean_adj_cov = np.nanvar(ensemble_mean_corrected_val.T, axis=0)

posterior_dist_mu = np.nanmean(ensemble_sample_calib_val.T, axis=0)
posterior_dist_median = np.nanmedian(ensemble_sample_calib_val.T, axis=0)
posterior_dist_cov = np.nanvar(ensemble_sample_calib_val.T, axis=0)

posterior_dist_quantiles = [
    np.percentile(ensemble_sample_calib_val.T,
                  [100 - (100 - q) / 2, (100 - q) / 2], axis=0)
    for q in [68, 95, 99]
]

# compute statistics, additional variance due to G
Пример #45
0
    def run(self) -> None:
        """
        Run method of the module. An artificial planet is injected (based on the noise level) at a
        given separation and position angle. The amount of self-subtraction is then determined and
        the contrast limit is calculated for a given sigma level or false positive fraction. A
        correction for small sample statistics is applied for both cases. Note that if the sigma
        level is fixed, the false positive fraction changes with separation, following the
        Student's t-distribution (see Mawet et al. 2014 for details).

        Returns
        -------
        NoneType
            None
        """

        images = self.m_image_in_port.get_all()
        psf = self.m_psf_in_port.get_all()

        if psf.shape[0] != 1 and psf.shape[0] != images.shape[0]:
            raise ValueError(f'The number of frames in psf_in_tag {psf.shape} does not match with '
                             f'the number of frames in image_in_tag {images.shape}. The '
                             f'DerotateAndStackModule can be used to average the PSF frames '
                             f'(without derotating) before applying the ContrastCurveModule.')

        cpu = self._m_config_port.get_attribute('CPU')
        working_place = self._m_config_port.get_attribute('WORKING_PLACE')

        parang = self.m_image_in_port.get_attribute('PARANG')
        pixscale = self.m_image_in_port.get_attribute('PIXSCALE')

        self.m_image_in_port.close_port()
        self.m_psf_in_port.close_port()

        if self.m_cent_size is not None:
            self.m_cent_size /= pixscale

        if self.m_edge_size is not None:
            self.m_edge_size /= pixscale

        self.m_aperture /= pixscale

        pos_r = np.arange(self.m_separation[0]/pixscale,
                          self.m_separation[1]/pixscale,
                          self.m_separation[2]/pixscale)

        pos_t = np.arange(self.m_angle[0]+self.m_extra_rot,
                          self.m_angle[1]+self.m_extra_rot,
                          self.m_angle[2])

        if self.m_cent_size is None:
            index_del = np.argwhere(pos_r-self.m_aperture <= 0.)
        else:
            index_del = np.argwhere(pos_r-self.m_aperture <= self.m_cent_size)

        pos_r = np.delete(pos_r, index_del)

        if self.m_edge_size is None or self.m_edge_size > images.shape[1]/2.:
            index_del = np.argwhere(pos_r+self.m_aperture >= images.shape[1]/2.)
        else:
            index_del = np.argwhere(pos_r+self.m_aperture >= self.m_edge_size)

        pos_r = np.delete(pos_r, index_del)

        positions = []
        for sep in pos_r:
            for ang in pos_t:
                positions.append((sep, ang))

        result = []
        async_results = []

        # Create temporary files
        tmp_im_str = os.path.join(working_place, 'tmp_images.npy')
        tmp_psf_str = os.path.join(working_place, 'tmp_psf.npy')

        np.save(tmp_im_str, images)
        np.save(tmp_psf_str, psf)

        mask = create_mask(images.shape[-2:], (self.m_cent_size, self.m_edge_size))

        _, im_res = pca_psf_subtraction(images=images*mask,
                                        angles=-1.*parang+self.m_extra_rot,
                                        pca_number=self.m_pca_number)

        noise = combine_residuals(method=self.m_residuals, res_rot=im_res)

        pool = mp.Pool(cpu)

        for pos in positions:
            async_results.append(pool.apply_async(contrast_limit,
                                                  args=(tmp_im_str,
                                                        tmp_psf_str,
                                                        noise,
                                                        mask,
                                                        parang,
                                                        self.m_psf_scaling,
                                                        self.m_extra_rot,
                                                        self.m_pca_number,
                                                        self.m_threshold,
                                                        self.m_aperture,
                                                        self.m_residuals,
                                                        self.m_snr_inject,
                                                        pos)))

        pool.close()

        start_time = time.time()

        # wait for all processes to finish
        while mp.active_children():
            # number of finished processes
            nfinished = sum([i.ready() for i in async_results])

            progress(nfinished, len(positions), 'Calculating detection limits...', start_time)

            # check if new processes have finished every 5 seconds
            time.sleep(5)

        if nfinished != len(positions):
            sys.stdout.write('\r                                                      ')
            sys.stdout.write('\rCalculating detection limits... [DONE]\n')
            sys.stdout.flush()

        # get the results for every async_result object
        for item in async_results:
            result.append(item.get())

        pool.terminate()

        os.remove(tmp_im_str)
        os.remove(tmp_psf_str)

        result = np.asarray(result)

        # Sort the results first by separation and then by angle
        indices = np.lexsort((result[:, 1], result[:, 0]))
        result = result[indices]

        result = result.reshape((pos_r.size, pos_t.size, 4))

        mag_mean = np.nanmean(result, axis=1)[:, 2]
        mag_var = np.nanvar(result, axis=1)[:, 2]
        res_fpf = result[:, 0, 3]

        limits = np.column_stack((pos_r*pixscale, mag_mean, mag_var, res_fpf))

        self.m_image_in_port._check_status_and_activate()
        self.m_contrast_out_port._check_status_and_activate()

        self.m_contrast_out_port.set_all(limits, data_dim=2)

        history = f'{self.m_threshold[0]} = {self.m_threshold[1]}'
        self.m_contrast_out_port.add_history('ContrastCurveModule', history)
        self.m_contrast_out_port.copy_attributes(self.m_image_in_port)
        self.m_contrast_out_port.close_port()
Пример #46
0
def variance(F, B, c, f, b):
    return np.nanvar(f)
Пример #47
0
def stack_var(arrs, nodata=None):
    """see stack_stats"""
    a = check_stack(arrs)
    if nodata is not None:
        a = mask_stack(a, nodata=nodata)
    return np.nanvar(a, axis=0)
Пример #48
0
def reweight(MSh, mode):

    # get data per antenna
    var_antenna = {}
    med_antenna = {}
    for ant_id, ant_name, ms_ant in MSh.iter_antenna():

        with Timer('Get data'):
            data = ms_ant.getcol('GDATA')  # axes: time, ant, freq, pol
            flags = ms_ant.getcol('GFLAG')  # axes: time, ant, freq, pol

            # put flagged data to NaNs
            data[flags] = np.nan

            # if completely flagged set variance to 1 and continue
            if np.all(flags):
                var_antenna[ant_id] = None
                med_antenna[ant_id] = None
                continue

        with Timer('Prepare data'):

            # data column is updated subtracting adjacent channels
            if mode == 'subchan':
                data_shifted_l = np.roll(data, -1, axis=2)
                data_shifted_r = np.roll(data, +1, axis=2)
                # if only 2 freq it's aleady ok, subtracting one from the other
                if data.shape[2] > 2:
                    data_shifted_l[:, :,
                                   -1, :] = data_shifted_l[:, :,
                                                           -3, :]  # last chan uses the one but last
                    data_shifted_r[:, :,
                                   0, :] = data_shifted_r[:, :,
                                                          2, :]  # first chan uses third
                # get the "best" shift, either on the right or left. This is to avoid propagating bad channels (e.g. with RFI)
                ratio_l = np.nanvar(data_shifted_l, axis=(
                    0, 1, 3)) / np.nanmean(data_shifted_l, axis=(0, 1, 3))
                ratio_l[np.isnan(ratio_l)] = np.inf
                ratio_r = np.nanvar(data_shifted_r, axis=(
                    0, 1, 3)) / np.nanmean(data_shifted_r, axis=(0, 1, 3))
                ratio_r[np.isnan(ratio_r)] = np.inf
                data = np.where((ratio_l < ratio_r)[np.newaxis, np.newaxis, :,
                                                    np.newaxis],
                                data - data_shifted_l, data - data_shifted_r)

            # data column is updated subtracting adjacent times
            if mode == 'subtime':
                data_shifted_l = np.roll(data, -1, axis=0)
                data_shifted_r = np.roll(data, +1, axis=0)
                # if only 2 freq it's aleady ok, subtracting one from the other
                if data.shape[0] > 2:
                    data_shifted_l[-1, :, :, :] = data_shifted_l[
                        -3, :, :, :]  # last timeslot uses the one but last
                    data_shifted_r[0, :, :, :] = data_shifted_r[
                        2, :, :, :]  # first timeslot uses third
                # get the "best" shift, either on the right or left. This is to avoid propagating bad channels (e.g. with RFI)
                ratio_l = np.nanvar(data_shifted_l, axis=(
                    1, 2, 3)) / np.nanmean(data_shifted_l, axis=(1, 2, 3))
                ratio_l[np.isnan(ratio_l)] = np.inf
                ratio_r = np.nanvar(data_shifted_r, axis=(
                    1, 2, 3)) / np.nanmean(data_shifted_r, axis=(1, 2, 3))
                ratio_r[np.isnan(ratio_r)] = np.inf
                data = np.where((ratio_l < ratio_r)[:, np.newaxis, np.newaxis,
                                                    np.newaxis],
                                data - data_shifted_l, data - data_shifted_r)

            # use residual data, nothing to do here
            elif mode == 'residual':
                pass

        with Timer('Calc variances'):
            # find mean/variance per time/freq for each antenna

            med_freqs = np.abs(np.nanmean(data, axis=(1, 2))**2)  # time x pol
            med_times = np.abs(np.nanmean(data, axis=(0, 1))**2)  # freq x pol
            med_antenna[
                ant_id] = med_freqs[:, np.
                                    newaxis] + med_times  # sum of the time/freq mean - axes: time,freq,pol

            var_freqs = np.nanvar(data, axis=(1, 2))  # time x pol
            var_times = np.nanvar(data, axis=(0, 1))  # freq x pol
            var_antenna[
                ant_id] = var_freqs[:, np.
                                    newaxis] + var_times  # sum of the time/freq variances - axes: time,freq,pol

    # reconstruct BL weights from antenna variance
    for ms_bl in MSh.ms.iter(["ANTENNA1", "ANTENNA2"]):
        ant_id1 = ms_bl.getcol('ANTENNA1')[0]
        ant_id2 = ms_bl.getcol('ANTENNA2')[0]

        if var_antenna[ant_id1] is None or var_antenna[ant_id2] is None:
            continue

        #        print '### BL: %i - %i' % (ant_id1, ant_id2)
        #        print var_antenna[ant_id1]*med_antenna[ant_id2]
        #        print ''
        #        print var_antenna[ant_id2]*med_antenna[ant_id1]
        #        print ''
        #        print var_antenna[ant_id1]*var_antenna[ant_id2]
        w = 1./( var_antenna[ant_id1]*med_antenna[ant_id2] + var_antenna[ant_id2]*med_antenna[ant_id1] \
               + var_antenna[ant_id1]*var_antenna[ant_id2] )

        w -= np.nanmedian(w)  # TEST: REMOVE MEDIAN?

        f = ms_bl.getcol('FLAG')
        # find how many unflagged weights are nans
        ntoflag = np.count_nonzero(np.isnan(w[~f]))
        logging.debug('BL: %i - %i: created %i new flags (%f%%)' %
                      (ant_id1, ant_id2, ntoflag,
                       (100. * ntoflag) / np.size(w)))
        ms_bl.putcol(MSh.wcolname, w)
        ms_bl.flush()
        # flag weights that are nans
        taql(
            'update $ms_bl set FLAG[isnan(WEIGHT_SPECTRUM)]=True, WEIGHT_SPECTRUM[isnan(WEIGHT_SPECTRUM)]=0'
        )
        ms_bl.flush()
Пример #49
0
            index = pop + POP_NB * Replicate
            HsSelMPop.append(HsSel[index][gen])
            HobsSelMPop.append(HobsSel[index][gen])
            FisSelMPop.append(FisSel[index][gen])
            ExtMPop.append(ExtSel[index][gen])
        if np.nansum(HobsSelMPop) == 0 and np.nansum(FisSelMPop) == 0:
            HsSelBarMeanRep.append(np.nan)
            HsSelBarVarRep.append(np.nan)
            HobsSelBarMeanRep.append(np.nan)
            HobsSelBarVarRep.append(np.nan)
            FisSelBarMeanRep.append(np.nan)
            FisSelBarVarRep.append(np.nan)
            ExtSelTotRep.append(sum(ExtMPop))
        else:
            HsSelBarMeanRep.append(np.nanmean(HsSelMPop))
            HsSelBarVarRep.append(np.nanvar(HsSelMPop))
            HobsSelBarMeanRep.append(np.nanmean(HobsSelMPop))
            HobsSelBarVarRep.append(np.nanvar(HobsSelMPop))
            FisSelBarMeanRep.append(np.nanmean(FisSelMPop))
            FisSelBarVarRep.append(np.nanvar(FisSelMPop))
            ExtSelTotRep.append(sum(ExtMPop))

    HsSelBarMean.append(HsSelBarMeanRep)
    HsSelBarVar.append(HsSelBarVarRep)
    HobsSelBarMean.append(HobsSelBarMeanRep)
    HobsSelBarVar.append(HobsSelBarVarRep)
    FisSelBarMean.append(FisSelBarMeanRep)
    FisSelBarVar.append(FisSelBarVarRep)
    ExtSelTot.append(ExtSelTotRep)

# We mean HtLocSel and HtLocNSel for the markers
Пример #50
0
def calculate_posterior_mc_frac(mc_da,
                                cov_da,
                                var_dim=None,
                                normalize_per_cell=True,
                                clip_norm_value=10):
    # so we can do post_frac only in a very small set of gene to prevent memory issue
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # here we expected to see a true_divide warning due to cov=0
        raw_frac = mc_da / cov_da

    if isinstance(raw_frac, np.ndarray):
        # np.ndarray
        ndarray = True
    else:
        ndarray = False

    if ndarray:
        cell_rate_mean = np.nanmean(raw_frac, axis=1)
        cell_rate_var = np.nanvar(raw_frac, axis=1)
    else:
        # assume xr.DataArray
        if var_dim is None:
            cell_rate_mean = raw_frac.mean(axis=1)  # this skip na
            cell_rate_var = raw_frac.var(axis=1)  # this skip na
        else:
            cell_rate_mean = raw_frac.mean(dim=var_dim)  # this skip na
            cell_rate_var = raw_frac.var(dim=var_dim)  # this skip na

    # based on beta distribution mean, var
    # a / (a + b) = cell_rate_mean
    # a * b / ((a + b) ^ 2 * (a + b + 1)) = cell_rate_var
    # calculate alpha beta value for each cell
    cell_a = (1 - cell_rate_mean) * (cell_rate_mean**
                                     2) / cell_rate_var - cell_rate_mean
    cell_b = cell_a * (1 / cell_rate_mean - 1)

    # cell specific posterior rate
    post_frac: Union[np.ndarray, xr.DataArray]
    if ndarray:
        post_frac = (mc_da + cell_a[:, None]) / (cov_da + cell_a[:, None] +
                                                 cell_b[:, None])
    else:
        post_frac = (mc_da + cell_a) / (cov_da + cell_a + cell_b)

    if normalize_per_cell:
        # there are two ways of normalizing per cell, by posterior or prior mean:
        # prior_mean = cell_a / (cell_a + cell_b)
        # posterior_mean = post_rate.mean(dim=var_dim)

        # Here I choose to use prior_mean to normalize cell,
        # therefore all cov == 0 features will have normalized rate == 1 in all cells.
        # i.e. 0 cov feature will provide no info
        prior_mean = cell_a / (cell_a + cell_b)
        if ndarray:
            post_frac = post_frac / prior_mean[:, None]
        else:
            post_frac = post_frac / prior_mean
        if clip_norm_value is not None:
            if isinstance(post_frac, np.ndarray):
                # np.ndarray
                post_frac[post_frac > clip_norm_value] = clip_norm_value
            else:
                # xarray.DataArray
                post_frac = post_frac.where(post_frac < clip_norm_value,
                                            clip_norm_value)
    return post_frac
Пример #51
0
    def compute(self, raster_sources, sample_prob=None):
        """Compute the mean and stds over all the raster_sources.

        This ignores NODATA values.

        If sample_prob is set, then a subset of each scene is used to compute stats which
        speeds up the computation. Roughly speaking, if sample_prob=0.5, then half the
        pixels in the scene will be used. More precisely, the number of chips is equal to
        sample_prob * (width * height / 300^2), or 1, whichever is greater. Each chip is
        uniformly sampled from the scene with replacement. Otherwise, it uses a sliding
        window over the entire scene to compute stats.

        Args:
            raster_sources: list of RasterSource
            sample_prob: (float or None) between 0 and 1
        """
        stride = chip_sz
        nb_channels = raster_sources[0].num_channels

        def get_chip(raster_source, window):
            """Return chip or None if all values are NODATA."""
            chip = raster_source.get_raw_chip(window).astype(np.float32)
            # Convert shape from [h,w,c] to [c,h*w]
            chip = np.reshape(np.transpose(chip, [2, 0, 1]), (nb_channels, -1))

            # Ignore NODATA values.
            chip[chip == 0.0] = np.nan
            if np.any(~np.isnan(chip)):
                return chip
            return None

        def sliding_chip_stream():
            """Get stream of chips using a sliding window of size 300."""
            for raster_source in raster_sources:
                with raster_source.activate():
                    windows = raster_source.get_extent().get_windows(
                        chip_sz, stride)
                    for window in windows:
                        chip = get_chip(raster_source, window)
                        if chip is not None:
                            yield chip

        def random_chip_stream():
            """Get random stream of chips."""
            for raster_source in raster_sources:
                with raster_source.activate():
                    extent = raster_source.get_extent()
                    num_pixels = extent.get_width() * extent.get_height()
                    num_chips = round(
                        sample_prob * (num_pixels / (chip_sz**2)))
                    num_chips = max(1, num_chips)
                    for _ in range(num_chips):
                        window = raster_source.get_extent().make_random_square(
                            chip_sz)
                        chip = get_chip(raster_source, window)
                        if chip is not None:
                            yield chip

        # For each chip, compute the mean and var of that chip and then update the
        # running mean and var.
        count = 0
        mean = np.zeros((nb_channels, ))
        var = np.zeros((nb_channels, ))
        chip_stream = (sliding_chip_stream()
                       if sample_prob is None else random_chip_stream())

        for c in chip_stream:
            chip_means = np.nanmean(c, axis=1)
            chip_vars = np.nanvar(c, axis=1)
            chip_count = np.sum(c[0] != np.nan)

            var = parallel_variance(chip_means, chip_count, chip_vars, mean,
                                    count, var)
            mean = parallel_mean(chip_means, chip_count, mean, count)
            count += chip_count

        self.means = mean
        self.stds = np.sqrt(var)
Пример #52
0
    def __init__(self,
                 filename,
                 world_type="TimeSeries",
                 name="",
                 owner="",
                 engine=None,
                 uid=None,
                 version=1,
                 config={}):
        World.__init__(self,
                       filename,
                       world_type=world_type,
                       name=name,
                       owner=owner,
                       uid=uid,
                       version=version,
                       config=config)

        self.data['assets'] = self.assets

        filename = config.get('time_series_data_file', "timeseries.npz")
        if os.path.isabs(filename):
            path = filename
        else:
            path = os.path.join(cfg['micropsi2']['data_directory'], filename)
        self.logger.info("loading timeseries from %s for world %s" %
                         (path, uid))

        self.realtime_per_entry = int(config['realtime_per_entry'])
        self.last_realtime_step = datetime.utcnow().timestamp() * 1000

        try:
            with np.load(path) as f:
                self.timeseries = f['data']
                self.ids = f['ids']
                self.timestamps = f['timestamps']
        except IOError as error:
            self.logger.error("Could not load data file %s, error was: %s" %
                              (path, str(error)))
            self.ids = [0]
            self.timeseries[[0, 0, 0]]
            self.timestamps = [0]
            self.len_ts = 1
            return

        # todo use the new configurable world options.
        dummydata = config['dummy_data'] == "True"
        z_transform = config['z_transform'] == "True"
        clip_and_scale = config['clip_and_scale'] == "True"
        sigmoid = config['sigmoid'] == "True"
        self.shuffle = config['shuffle'] == "True"

        if clip_and_scale and sigmoid:
            self.logger.warn(
                "clip_and_scale and sigmoid cannot both be configured, choosing sigmoid"
            )
            clip_and_scale = False

        def sigm(X):
            """ sigmoid that avoids float overflows for very small inputs.
                expects a numpy float array.
            """
            cutoff = np.log(np.finfo(X.dtype).max) - 1
            X[np.nan_to_num(X) <= -cutoff] = -cutoff
            return 1. / (1. + np.exp(-X))

        if (z_transform or clip_and_scale or sigmoid) and not dummydata:
            data_z = np.empty_like(self.timeseries)
            data_z[:] = np.nan
            pstds = []
            for i, row in enumerate(self.timeseries):
                if not np.all(np.isnan(row)):
                    std = np.sqrt(np.nanvar(row))
                    if std > 0:
                        if not clip_and_scale:
                            row_z = (row - np.nanmean(row)) / std
                        if clip_and_scale:
                            row_z = row - np.nanmean(row)
                            pstd = std * 4
                            row_z[np.nan_to_num(row_z) > pstd] = pstd
                            row_z[np.nan_to_num(row_z) < -pstd] = -pstd
                            row_z = ((row_z / pstd) + 1) * 0.5
                        data_z[i, :] = row_z
            self.timeseries = data_z if not sigmoid else sigm(data_z)

        if dummydata:
            self.logger.warn("! Using dummy data")
            n_ids = self.timeseries.shape[0]
            self.timeseries = np.tile(np.random.rand(n_ids, 1), (1, 10))

        self.len_ts = self.timeseries.shape[1]
Пример #53
0
def plot_chip_variation(plot_var=True):
    ## load data
    dataFile_list = []
    base_name = "sandwichJJ_102318"
    codename_list = ["pbj", "blt", "sub", "loaf"]
    color_list = ['red', 'purple', 'cyan', 'orange']
    quarterIndex_list = [3, 4]
    for q in quarterIndex_list:
        for codename in codename_list:
            next_file = base_name + f"_{codename}_q{q}.dat"
            dataFile_list.append(next_file)
    ##END loop through codenames

    ## load each file and convert to resistance
    for i, dataFile in enumerate(dataFile_list):
        # setup formating
        data_color = color_list[i % 4]
        codename = codename_list[i % 4]
        mark_forQuarter = ['*', 'o'][(i // 4) % 2]  # Q3 and Q4 respectively
        # for PBJ, BLT, SUB, LOAF
        evap_angle = [
            30 / 180 * np.pi, 30 / 180 * np.pi, 45 / 180 * np.pi,
            45 / 180 * np.pi
        ][i % 4]
        thickness = [1E-9, 1.5E-9, 1.5E-9, 1E-9][i % 4]
        hatch = ['//', '\\', 'x', '|'][i % 4]
        if mark_forQuarter == '*':
            continue  # skip Q3 for now, too high variance from opens

        ## based on chip number, is exterior the first or last column?
        # on Q3 increasing column number corresponds to moving towards interior
        # on Q4 '  ' moving towards exterior
        # Q1 ~ Q3 and Q2~Q4 but we're not using either of those (bad liftoff)
        is_radial_ordering = [False,
                              True][(i // 4) % 2]  # Q3 and Q4 respectively

        # load file
        with open(dataFile) as open_file:
            read = open_file.readlines()
            open_file.close()

        ## use regular expressions to extract 2 numbers
        re_template = "((\d+\.?\d*)\suV)"  # decimal number, then space then 'uV'
        num_blocks = 5
        voltage_list = parse_input(read,
                                   re_template,
                                   num_blocks=num_blocks,
                                   num_cols=4)
        res, current = voltage_to_resistance_critCurr(voltage_list, bias_r=1E6)

        ## get single junction devices ordered by distance from center of chip
        # ie reverse Q3
        if is_radial_ordering:
            iter_res = iter(res)
        else:
            iter_res = reversed(res)

        ## plot single squids as a function of chip position
        for i, single_column in enumerate(iter_res):
            singleJJ_res = get_singleJunc_devices(single_column)
            singleJJ_res /= 1E3
            avg = np.nanmean(singleJJ_res)
            var = np.nanvar(singleJJ_res)
            if plot_var:
                plt.plot(i,
                         var,
                         marker=mark_forQuarter,
                         ms=9,
                         color=data_color)
            else:
                for res in singleJJ_res:
                    plt.plot(i,
                             res,
                             marker=mark_forQuarter,
                             ms=5,
                             color=data_color)

        plt.xlabel("Radial position [from center]")
        if plot_var:
            plt.ylabel("Variance in Res. of Single SQuID [kOhms^2]")
        else:
            plt.ylabel("Res. of Single SQuID [kOhms]")
        plt.xticks(np.arange(i + 1, dtype='int'))
        add_bands(np.mean(res), evap_angle, thickness, data_color, hatch=hatch)
    ##END loop through data files

    ## make a custom legend
    label_list = [
        "PBJ Q4\n($30^\circ$ evap, Std $O_2$)",
        "BLT Q4\n($30^\circ$ evap, 0.5 nm Al$O_x$)",
        "SUB Q4\n($45^\circ$ evap, 0.5 nm Al$O_x$)",
        "LOAF Q4\n($45^\circ$ evap, Std $O_2$)"
    ]
    box_outline = dict(facecolor='white', alpha=0.8, boxstyle='round')
    x_pos, y_pos = 0.5, 0.9  ## axis coordinates
    gap = 0.105
    ax = plt.gca()
    for i, label in enumerate(label_list):
        c = color_list[i % 4]
        plt.text(x_pos,
                 y_pos - i * gap,
                 label,
                 color=c,
                 bbox=box_outline,
                 transform=ax.transAxes)

    plt.show()
Пример #54
0
    def _process_eeg(self, samples, timestamp):
        """Process EEG.

        Process EEG. Includes buffering, filtering, windowing and pipeline.

        Args:
            samples (numpy.ndarray): new EEG samples to process
            timestamp (float): timestamp

        Returns:
            output (scalar): output of the pipeline
        """

        # Re-map
        if self.eeg_ch_remap:
            samples = samples[:, self.eeg_ch_remap]

        self.eeg_buffer.update(samples)
        self._send_outputs(samples, timestamp, 'raw_eeg')

        # Apply filtes
        filt_samples = samples

        if config['filter']:
            filt_samples, self.bandpass_filt['zi'] = signal.lfilter(
                self.bandpass_filt['b'],
                self.bandpass_filt['a'],
                samples,
                axis=0,
                zi=self.bandpass_filt['zi'])
            # self._send_filtered_eeg(filt_samples, timestamp)
        self.filt_eeg_buffer.update(filt_samples)

        if config['hpfilter']:
            filt_samples, self.hp_filt['zi'] = signal.lfilter(
                self.hp_filt['b'],
                self.hp_filt['a'],
                filt_samples,
                axis=0,
                zi=self.hp_filt['zi'])
        self.hpfilt_eeg_buffer.update(filt_samples)

        if config['lpfilter']:
            smooth_eeg_samples, self.lp_filt['zi'] = signal.lfilter(
                self.lp_filt['b'],
                self.lp_filt['a'],
                filt_samples,
                axis=0,
                zi=self.lp_filt['zi'])
            if self.debug_outputs:
                self._send_output_vec(smooth_eeg_samples, timestamp,
                                      'smooth_eeg')
        else:
            smooth_eeg_samples = filt_samples
        self.smooth_eeg_buffer.update(smooth_eeg_samples)

        if config['filter_bank']:
            filter_bank_samples = {}
            for name, filt_dict in self.filter_bank.items():
                filter_bank_samples[name], self.filter_bank[name]['zi'] = \
                    signal.lfilter(filt_dict['b'], filt_dict['a'],
                                   filt_samples, axis=0,
                                   zi=self.filter_bank[name]['zi'])
            low_freq_chs = filter_bank_samples['delta'][0, [
                0, 2
            ]]  #+ filter_bank_samples['theta'][0, [0, 1]]

        window = self.smooth_eeg_buffer.extract(self.window_len)

        eegEarWindow = window[:, 3]  #data from right ear Channel
        #eye movement computed from the difference between two frontal channels
        eyewindow = self.smooth_eeg_buffer.extract(200)
        eegFLWindow = eyewindow[:, 1]
        eegFRWindow = eyewindow[:, 2]
        #        norm_diff_eyes = eegFLWindow[-1] - eegFRWindow[-1]*np.nanstd(eegFLWindow, axis=0)/np.nanstd(eegFRWindow, axis=0)
        #        eyeH = np.reshape([np.square(norm_diff_eyes)], (1, 1))

        #find blinks in the left eegEarWindow
        blinkVal = ut.blink_template_match(eegEarWindow)
        if (blinkVal > 100000 and self.blink == 0):
            self.blink = 50
            self.blinkwait = 350
        else:
            if (self.blinkwait > 0):
                self.blinkwait -= 1
            if (self.blink > 0):
                self.blink -= 1

        # LONGER-TERM CALM SCORE based on Saccadic Eye Movement
        eye_mov_percent = np.reshape(
            np.percentile(eegFLWindow - eegFRWindow, 90), (1, 1))
        self.eye_mov_percent_buffer.update(eye_mov_percent)
        remap_eye_mov_percent = ut.sigmoid(
            self.eye_mov_percent_buffer.extract().mean(), 0.5, -10, 0)

        max_value = 1
        incr_decr = remap_eye_mov_percent < 0.2
        inc = self.increments_buffer.extract().mean()
        dpoints_per_second = 0.0005

        if incr_decr:
            self.slow_calm_score += dpoints_per_second * inc  # 1/max([max_value - self.slow_calm_score, 1])
        else:
            self.slow_calm_score -= dpoints_per_second * inc * 4  #0.7 # (self.slow_calm_score)/1280

        self.increments_buffer.update(np.reshape(incr_decr, (1, 1)))

        if self.slow_calm_score > max_value:
            self.slow_calm_score = max_value
        elif self.slow_calm_score < 0:
            self.slow_calm_score = 0

        self.slow_calm_score_buffer.update(
            np.reshape(self.slow_calm_score, (1, 1)))

        # Send outputs at a reduced sampling rate
        if self.smooth_eeg_buffer.pts % 3 == 0:
            self._send_output_vec(smooth_eeg_samples, timestamp, 'muse/eeg')
            if (self.blink > 0):
                self._send_output(np.array([[1]]), timestamp, 'blink')
            else:
                self._send_output(np.array([[0]]), timestamp, 'blink')
            self._send_output(blinkVal / 300000, timestamp, 'blinkVal')
            self._send_output(remap_eye_mov_percent, timestamp, 'saccad')

            self._send_output(
                np.reshape(self.slow_calm_score_buffer.extract().mean(),
                           (1, 1)), timestamp, 'calm')  # slow_calm_score
            self._send_output(low_freq_chs / self.low_freq_chs_std + 0.5,
                              timestamp, 'low_freq_chs')

        # process and send output at every step.   usually about every 1/10s
        if self.eeg_buffer.pts > self.step:
            self.eeg_buffer.pts = 0

            # Get filtered EEG window
            if config['lpfilter']:
                window = self.smooth_eeg_buffer.extract(self.window_len)
            else:
                window = self.eeg_buffer.extract(self.window_len)
            psd_raw_buffer = self.eeg_buffer.extract(self.window_len)

            # Get average PSD
            psd, f = ut.fft_continuous(psd_raw_buffer,
                                       n=int(self.fs),
                                       psd=True,
                                       log='psd',
                                       fs=self.fs,
                                       window='hamming')
            self.psd_buffer.update(np.expand_dims(psd, axis=0))
            mean_psd = np.nanmean(self.psd_buffer.extract(), axis=0)

            # find variance of eegWindow  for Bad Signal detact
            eegVar = np.nanvar(window, axis=0)
            self._send_output_vec(eegVar.reshape(1, self.n_channels),
                                  timestamp, 'hsi')

            if (self.sparseOutput != None):
                #send channel varience for signal quality indication at source Raspberry Pi
                #send(Address('10.0.0.14','1234'), "/hsi", eegVar[0],eegVar[1],eegVar[2],eegVar[3])
                self._send_sparseOutput_vec(eegVar.reshape(1, self.n_channels),
                                            timestamp, 'hsi')

            # Get band powers and ratios

            bandPowers, bandNames = ut.compute_band_powers(mean_psd,
                                                           f,
                                                           relative=False)
            ratioPowers, ratioNames = ut.compute_band_ratios(bandPowers)

            if (self.firstWindowProc):
                self.band_powers = bandPowers
                self.band_names = bandNames
                self.ratio_powers = ratioPowers
                self.ratio_names = ratioNames
                self.scores = np.zeros((len(self.band_names), self.n_channels))
                self.firstWindowProc = False

            if (eegVar.mean() < 300
                    and self.blinkwait == 0):  #threshold for good data
                for i, (name, hist) in enumerate(self.hists.items()):
                    self.band_powers = bandPowers
                    self.ratio_powers = ratioPowers
                    #send good data indicator based on mean eegWindow variance and blinkwait
                    self._send_output(np.array([[1]]), timestamp,
                                      'goodData')  #good data
            else:
                self._send_output(np.array([[0]]), timestamp,
                                  'goodData')  #good data

            self._send_outputs(self.band_powers, timestamp, 'bands')
            self._send_outputs(self.ratio_powers, timestamp, 'ratios')

            mask = ((f >= 30) & (f < 50))

            self.low_freq_chs_buffer.update(np.reshape(low_freq_chs, (1, -1)))
            self.low_freq_chs_std = self.low_freq_chs_buffer.extract().std(
                axis=0)

            emg_power = np.mean(mean_psd[mask, 0],
                                axis=0)  #HF power of right ear
            self._send_output(np.array([np.sqrt(emg_power) / 2]), timestamp,
                              'emg')
Пример #55
0
def delta(x,
          y,
          x_denominators=1,
          y_denominators=1,
          assume_normal=True,
          alpha=0.05,
          min_observations=20,
          nruns=10000,
          relative=False):
    """ Calculates the difference of means between the samples in a statistical sense.
    Computation is done in form of treatment minus control, i.e. x-y.
    Note that NaNs are treated as if they do not exist in the data. 
    
    :param x: sample of the treatment group
    :type  x: pd.Series or array-like
    :param y: sample of the control group
    :type  y: pd.Series or array-like
    :param x_denominators: sample of the treatment group
    :type  x_denominators: pd.Series or array-like
    :param y_denominators: sample of the control group
    :type  y_denominators: pd.Series or array-like
    :param assume_normal: specifies whether normal distribution assumptions can be made
    :type  assume_normal: boolean
    :param alpha: significance level (alpha)
    :type  alpha: float
    :param min_observations: minimum number of observations needed
    :type  min_observations: int
    :param nruns: only used if assume normal is false
    :type  nruns: int
    :param relative: if relative==True, then the values will be returned
            as distances below and above the mean, respectively, rather than the
            absolute values. In	this case, the interval is mean-ret_val[0] to
            mean+ret_val[1]. This is more useful in many situations because it
            corresponds with the sem() and std() functions.
    :type: relative: boolean
    
    :return: results of type SimpleTestStatistics
    :rtype: SimpleTestStatistics
    """
    # Check if data was provided and it has correct format
    if x is None or y is None:
        raise ValueError('Please provide two non-None samples.')
    if not isinstance(x, pd.Series) and not isinstance(
            x, np.ndarray) and not isinstance(x, list):
        raise TypeError('Please provide samples of type Series or list.')
    if type(x) != type(y):
        raise TypeError('Please provide samples of the same type.')

    # check x and y are 'array-like'
    assert hasattr(x, '__len__')
    assert hasattr(y, '__len__')

    # If either denominator is a scalar, convert it to a
    # list of identical entries:
    if not hasattr(x_denominators, '__len__'):
        x_denominators = [x_denominators] * len(x)
    if not hasattr(y_denominators, '__len__'):
        y_denominators = [y_denominators] * len(y)

    # lengths should match
    assert len(x) == len(x_denominators)
    assert len(y) == len(y_denominators)

    # Must be numpy arrays of floats (otherwise .isnan won't work)
    x = np.array(x, dtype=float)
    y = np.array(y, dtype=float)
    x_denominators = np.array(x_denominators, dtype=float)
    y_denominators = np.array(y_denominators, dtype=float)

    # Add a NaN to the numerator for
    # each zero or NaN in the denominator:
    x = x / x_denominators * x_denominators
    y = y / y_denominators * y_denominators

    # Next, any NaNs in the numerator must be 'copied' to the denominator.
    x_denominators = x_denominators + (x * 0.0)
    y_denominators = y_denominators + (y * 0.0)

    # confirm the numerators have the same 'nan-ness' as their denominators
    assert (np.isnan(x) == np.isnan(x_denominators)).all()
    assert (np.isnan(y) == np.isnan(y_denominators)).all()

    percentiles = [alpha * 100 / 2, 100 - alpha * 100 / 2]

    _x = x
    _y = y
    _x_denominators = x_denominators
    _y_denominators = y_denominators
    _x_ratio = _x / _x_denominators
    _y_ratio = _y / _y_denominators
    _x_strange = _x / np.nanmean(_x_denominators)
    _y_strange = _y / np.nanmean(_y_denominators)

    # Four variables no longer used in this function, let's delete them for simplicity
    del x
    del y
    del x_denominators
    del y_denominators

    x_nan = np.isnan(_x_ratio).sum()
    y_nan = np.isnan(_y_ratio).sum()
    if x_nan > 0:
        warnings.warn('Discarding ' + str(x_nan) + ' NaN(s) in the x array!')
        logger.warning('Discarding ' + str(x_nan) + ' NaN(s) in the x array!')
    if y_nan > 0:
        warnings.warn('Discarding ' + str(y_nan) + ' NaN(s) in the y array!')
        logger.warning('Discarding ' + str(x_nan) + ' NaN(s) in the x array!')

    ss_x = sample_size(_x_ratio)
    ss_y = sample_size(_y_ratio)

    # Checking if enough observations are left after dropping NaNs
    partial_simple_test_stats = None
    if min(ss_x, ss_y) < min_observations:
        # Set mean to nan
        mu = np.nan
        # Create nan dictionary
        c_i = dict(list(zip(percentiles, np.empty(len(percentiles)) * np.nan)))
    else:
        # Computing the mean
        mu = _delta_mean(_x, _y)
        # Computing the confidence intervals
        if assume_normal:
            logger.info(
                "The distribution of two samples is assumed normal. "
                "Performing the sample difference distribution calculation.")
            partial_simple_test_stats = normal_sample_weighted_difference(
                x_numerators=_x,
                y_numerators=_y,
                x_denominators=_x_denominators,
                y_denominators=_y_denominators,
                percentiles=percentiles,
                relative=relative)
            c_i = partial_simple_test_stats['c_i']
            mu = partial_simple_test_stats[
                'mean1'] - partial_simple_test_stats['mean2']
        else:
            logger.info(
                "The distribution of two samples is not normal. Performing the bootstrap."
            )
            c_i, _ = bootstrap(x=_x_strange,
                               y=_y_strange,
                               percentiles=percentiles,
                               nruns=nruns,
                               relative=relative)

    if partial_simple_test_stats is not None:  # correct the last few lines!!
        treatment_statistics = SampleStatistics(
            ss_x, partial_simple_test_stats['mean1'],
            partial_simple_test_stats['var1'])
        control_statistics = SampleStatistics(
            ss_y, partial_simple_test_stats['mean2'],
            partial_simple_test_stats['var2'])
    else:
        # actually, this is a bit rubbish, only applies to bootstrap and min_observations:
        treatment_statistics = SampleStatistics(ss_x,
                                                float(np.nanmean(_x_strange)),
                                                float(np.nanvar(_x_strange)))
        control_statistics = SampleStatistics(ss_y,
                                              float(np.nanmean(_y_strange)),
                                              float(np.nanvar(_y_strange)))

    variant_statistics = BaseTestStatistics(control_statistics,
                                            treatment_statistics)
    if partial_simple_test_stats is not None:
        p_value = partial_simple_test_stats['p_value']
    else:
        p_value = compute_p_value_from_samples(_x_strange, _y_strange)
    statistical_power = compute_statistical_power_from_samples(
        _x_strange, _y_strange, alpha)  # TODO: wrong

    logger.info("Delta calculation finished!")
    return SimpleTestStatistics(variant_statistics.control_statistics,
                                variant_statistics.treatment_statistics,
                                float(mu), c_i, p_value, statistical_power)
Пример #56
0
def get_statistics_per_category(databaseFolder, processedDataFolder=None):

    if processedDataFolder == None:
        processedDataFolder = "datasets/acousticic"

    classes = sorted([f for f in os.listdir(databaseFolder)
                      if os.path.isdir(os.path.join(databaseFolder, f)) and not f.startswith('.')],
                     key=lambda f: f.lower())
    stats_names = ['max', 'min', 'mean', 'median', 'std', 'var', 'kurt', 'skew', 'percentile25', 'percentile50',
                   'percentile75']

    categoryDictionary = {"voice": ["f0", "vuv"],
                          "glottal_flow": ["naq", "qoq", "h1h2", "psp", "mdq", "peakslope", "rd", "creak"],
                          "mcep": ["mcep_"],
                          "hmpdm": ["hmpdm_"],
                          "hmpdd": ["hmpdd_"],
                          }
    for category in categoryDictionary.keys():
        startFlag = True
        analyzedFiles = []
        for className in classes:
            files = sorted([f for f in os.listdir(os.path.join(databaseFolder, className))
                          if os.path.isfile(os.path.join(databaseFolder, className, f)) and not f.startswith('.')
                            and f[-4:].lower() == ".csv"], key=lambda f: f.lower())
            analyzedFiles += ["%s,%s" % (file, className) for file in files]
            for feat_file in files:
                mm_feats = []
                mm_names = []
                df = pandas.read_csv(os.path.join(databaseFolder, className, feat_file), header='infer')
                feature_names = df.columns.values
                for feat in feature_names:
                    reference = categoryDictionary.get(category)
                    for string in reference:
                        if feat.strip().lower().startswith(string) \
                                or feat.strip().lower().endswith(string):
                            # Feature vector
                            vals = df[feat].values
                            # Run statistics
                            maximum = np.nanmax(vals)
                            minimum = np.nanmin(vals)
                            mean = np.nanmean(vals)
                            median = np.nanmedian(vals)
                            std = np.nanstd(vals)
                            var = np.nanvar(vals)
                            kurt = scipy.stats.kurtosis(vals)
                            skew = scipy.stats.skew(vals)
                            percentile25 = np.nanpercentile(vals, 25)
                            percentile50 = np.nanpercentile(vals, 50)
                            percentile75 = np.nanpercentile(vals, 75)
                            names = [feat.strip() + "_" + stat for stat in stats_names]
                            feats = [maximum, minimum, mean, median, std, var, kurt, skew, percentile25, percentile50, percentile75]
                            if startFlag:
                                for n in names:
                                    mm_names.append(n)
                            for f in feats:
                                if np.isinf(f):
                                    mm_feats.append(np.sign(f))
                                elif np.isnan(f):
                                    mm_feats.append(0)
                                else:
                                    mm_feats.append(f)
                            break
                if startFlag:
                    matrix = [mm_names + ["Class"]]
                    startFlag = False
                matrix.append(mm_feats + [className])
        am.create_arff(matrix,classes,processedDataFolder,category,category)
        print("Analysis of %s acquired." % (category))
        with open(os.path.join(processedDataFolder, "%s.txt"%(category)), "w+") as files:
            files.write("\n".join(analyzedFiles))
Пример #57
0
    Rs_GT = la.norm(x_GT - N, axis=1)
    RD_GT = (-Rs_GT[mp[:, 1]] + Rs_GT[mp[:, 0]])

    Rs_corr = np.array([NR_corr[i - 1][3] for i in Nids])
    Rs = np.array(row.iat[9]) * 1e-9 * C0 + Rs_corr
    RD = (-Rs[mp[:, 1]] + Rs[mp[:, 0]])

    diff = RD_GT - RD

    while sum((~np.isnan(diff)).astype(int)) > 1:
        score = np.zeros(M)
        med = np.zeros(M)
        var = np.zeros(M)
        for i in range(M):
            x = np.abs(diff[(mp == i).any(axis=1)])
            var[i] = np.nanvar(x)**0.5
            med[i] = np.nanmedian(x)

        if np.nanmin(var / med) > 1e-1:
            break

        rem = np.nanargmin(var / med)

        diffidx = np.where((mp == rem).any(axis=1) & (~np.isnan(diff)))[0][-1]

        cond = mp[diffidx, 0] == rem
        flip = 1 if cond else -1

        d = np.sign(diff[diffidx]) * flip * med[rem]

        if abs(d) < 5e4:
Пример #58
0
def get_statistics_covarep(databaseFolder, processedDataFolder=None, outputFileName=None, relationName=None):

    if processedDataFolder == None:
        processedDataFolder = "datasets/acousticic"
    if outputFileName== None:
        outputFileName = "all"
    if relationName == None:
        relationName = "all_acousticical"

    classes = sorted([f for f in os.listdir(databaseFolder)
                      if os.path.isdir(os.path.join(databaseFolder, f)) and not f.startswith('.')],
                     key=lambda f: f.lower())
    stats_names = ['max', 'min', 'mean', 'median', 'std', 'var', 'kurt', 'skew', 'percentile25', 'percentile50',
                   'percentile75']

    startFlag = True
    analyzedFiles = []
    for className in classes:
        files = sorted([f for f in os.listdir(os.path.join(databaseFolder, className))
                      if os.path.isfile(os.path.join(databaseFolder, className, f)) and not f.startswith('.')
                        and f[-4:].lower() == ".csv"], key=lambda f: f.lower())
        analyzedFiles += ["%s,%s" % (file, className) for file in files]
        for feat_file in files:
            mm_feats = []
            mm_names = []
            df = pandas.read_csv(os.path.join(databaseFolder, className, feat_file), header='infer')
            feature_names = df.columns.values
            for feat in feature_names:
                # Feature vector
                vals = df[feat].values
                # Run statistics
                maximum = np.nanmax(vals)
                minimum = np.nanmin(vals)
                mean = np.nanmean(vals)
                median = np.nanmedian(vals)
                std = np.nanstd(vals)
                var = np.nanvar(vals)
                kurt = scipy.stats.kurtosis(vals)
                skew = scipy.stats.skew(vals)
                percentile25 = np.nanpercentile(vals, 25)
                percentile50 = np.nanpercentile(vals, 50)
                percentile75 = np.nanpercentile(vals, 75)
                names = [feat.strip() + "_" + stat for stat in stats_names]
                feats = [maximum, minimum, mean, median, std, var, kurt, skew, percentile25, percentile50, percentile75]
                if startFlag:
                    for n in names:
                        mm_names.append(n)
                for f in feats:
                    if np.isinf(f):
                        mm_feats.append(np.sign(f))
                    elif np.isnan(f):
                        mm_feats.append(0)
                    else:
                        mm_feats.append(f)
            if startFlag:
                matrix = [mm_names + ["Class"]]
                startFlag = False
            matrix.append(mm_feats + [className])
    am.create_arff(matrix, classes, processedDataFolder, outputFileName, relationName)
    print("Analysis of all COVAREP features acquired.")
    with open(os.path.join(processedDataFolder, outputFileName + ".txt"), "w+") as files:
        files.write("\n".join(analyzedFiles))
Пример #59
0
def de_mean(u, v):
    up = u - np.nanmean(u)
    vp = v - np.nanmean(v)
    return up, vp


# find angle of principal axes
up, vp = de_mean(ubar, vbar)
# we mask out high speeds because they introduced a funny direction
spd_lim = 0.5
spd = np.sqrt(up**2 + vp**2)
up[spd > spd_lim] = np.nan
vp[spd > spd_lim] = np.nan
up, vp = de_mean(up, vp)
theta = 0.5 * np.arctan2(2 * np.nanmean(up * vp),
                         (np.nanvar(up) - np.nanvar(vp)))

# and rotate
ubar_r, vbar_r = rot_vec(ubar, vbar, theta)
u_r, v_r = rot_vec(u, v, theta)

# plotting
plt.close('all')
fig = plt.figure(figsize=(18, 10))

# map
ax = fig.add_subplot(3, 4, 4)
ax.plot(lon, lat, '*r')
pad = .1
ax.axis([lon - pad, lon + pad, lat - pad, lat + pad])
pfun.add_coast(ax)
Пример #60
0
 def __QS_move__(self, idt, **kwargs):
     if self._iDT == idt: return 0
     CurInd = self._AllDTs.index(idt)
     if CurInd <= self.EventPreWindow + self.EstWindow: return 0
     self._Output["事件记录"][:, 2] += 1
     IDs = self._FactorTable.getFilteredID(idt=idt,
                                           id_filter_str=self.EventFilter)
     nID = len(IDs)
     if nID > 0:
         self._Output["事件记录"] = np.r_[self._Output["事件记录"],
                                      np.c_[IDs, [idt] * nID,
                                            np.zeros(shape=(nID, 1))]]
         Temp = np.full(shape=(nID, self.EventPreWindow + 1 +
                               self.EventPostWindow),
                        fill_value=np.nan)
         self._Output["预期收益率"] = np.r_[self._Output["预期收益率"], Temp]
         self._Output["异常收益率"] = np.r_[self._Output["异常收益率"], Temp]
         self._Output["异常方差"] = np.r_[self._Output["异常方差"], Temp]
         EstStartInd = CurInd - self.EventPreWindow - self.EstWindow - 1
         Price = self._FactorTable.readData(
             dts=self._AllDTs[EstStartInd:CurInd + 1],
             ids=IDs,
             factor_names=[self.PriceFactor]).iloc[0, :, :]
         Return = _calcReturn(Price.values, return_type=self.ReturnType)
         BPrice = self._BenchmarkFT.readData(
             factor_names=[self.BenchmarkPrice],
             ids=[self.BenchmarkID],
             dts=self._AllDTs[EstStartInd:CurInd + 1]).iloc[0, :, :]
         ExpectedReturn = _calcReturn(BPrice.values,
                                      return_type=self.ReturnType).repeat(
                                          nID, axis=1)
         self._Output["预期收益率"][-nID:, :self.EventPreWindow +
                               1] = ExpectedReturn[self.EstWindow:].T
         self._Output["异常收益率"][-nID:, :self.EventPreWindow +
                               1] = (Return[self.EstWindow:] -
                                     ExpectedReturn[self.EstWindow:]).T
         self._Output["异常方差"][-nID:, :] = np.nanvar(
             Return[:self.EstWindow] - ExpectedReturn[:self.EstWindow],
             axis=0,
             ddof=1).reshape(
                 (nID,
                  1)).repeat(self.EventPreWindow + 1 + self.EventPostWindow,
                             axis=1)
     Mask = (self._Output["事件记录"][:, 2] <= self.EventPostWindow)
     IDs = self._Output["事件记录"][:, 0][Mask]
     RowPos, ColPos = np.arange(
         self._Output["异常收益率"].shape[0])[Mask].tolist(), (
             self._Output["事件记录"][Mask, 2] + self.EventPreWindow).astype(
                 np.int)
     BPrice = self._BenchmarkFT.readData(
         factor_names=[self.BenchmarkPrice],
         ids=[self.BenchmarkID],
         dts=[self._AllDTs[CurInd - 1], idt]).iloc[0, :, 0]
     ExpectedReturn = _calcReturn(BPrice.values,
                                  return_type=self.ReturnType).repeat(
                                      len(IDs), axis=0)
     self._Output["预期收益率"][RowPos, ColPos] = ExpectedReturn
     Price = self._FactorTable.readData(
         dts=[self._AllDTs[CurInd - 1], idt],
         ids=sorted(set(IDs)),
         factor_names=[self.PriceFactor]).iloc[0, :, :].loc[:, IDs]
     self._Output["异常收益率"][RowPos, ColPos] = (
         _calcReturn(Price.values, return_type=self.ReturnType)[0] -
         ExpectedReturn)
     return 0