def compute_righthind(forecast, hindcast, observation):

    """
    Esta função faz a correção do hincast usando
    a regressão linear (correção de bias). Mesmo
    método utilizado pelo Júnior.
    """

    # Calcula média e desvio padrão para todos os pontos da climatologia do modelo
    clim_mean = np.nanmean(hindcast, axis=0)
    clim_std = np.nanstd(hindcast, axis=0)

    # Calcula média e desvio padrão para todos os pontos da climatologia do
    # dados observado
    obs_mean = np.nanmean(observation, axis=0)
    obs_std = np.nanstd(observation, axis=0)

    # Calcula variável padronizada do modelo e observado
    clim_pad = (hindcast - clim_mean)/clim_std
    # obs_pad = (observation - obs_mean)/obs_std
    fcst_pad = (forecast - clim_mean)/clim_std

    # newhind é o hindcast corrigido
    newhind = clim_pad * obs_std + obs_mean

    newfcst = fcst_pad * obs_std + obs_mean

    return newhind, newfcst
Exemplo n.º 2
0
def make_NRCS_image( nobj, bandname, fn='', dir='.', max=np.nan, min=np.nan,
        **kwargs):
    if not fn:
        if 'reduced' in bandname:
            fn = bandname[:9]+'.png'
        else:
            fn = bandname+'.png'
    resize(nobj)
    try:
        s0 = 10.0*np.log10(nobj[bandname])
    except:
        n_obj.undo()
        raise
    s0[np.where(np.isinf(s0))]=np.nan
    #if nobj.fileName[-2:]=='nc':
    #    s0 = flipdim(nobj,s0)

    caption='dB'
    if np.isnan(min):
        min = np.nanmedian(s0,axis=None)-2.0*np.nanstd(s0,axis=None)
    if np.isnan(max):
        max = np.nanmedian(s0,axis=None)+2.0*np.nanstd(s0,axis=None)
    nansatFigure(s0, min, max, dir, fn)
    nobj.undo()
    return fn
Exemplo n.º 3
0
def sigma_clip(image, sigma_lo=3, sigma_hi=3, max_iter=5, axis=0):
    """Reference implementation in numpy"""
    image = image.copy()
    mask = numpy.logical_not(numpy.isfinite(image))
    dummies = mask.sum()
    image[mask] = numpy.NaN
    mean = numpy.nanmean(image, axis=axis, dtype="float64")
    std = numpy.nanstd(image, axis=axis, dtype="float64")
    for _ in range(max_iter):
        if axis == 0:
            mean2d = as_strided(mean, image.shape, (0, mean.strides[0]))
            std2d = as_strided(std, image.shape, (0, std.strides[0]))
        else:
            mean2d = as_strided(mean, image.shape, (mean.strides[0], 0))
            std2d = as_strided(std, image.shape, (std.strides[0], 0))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            delta = (image - mean2d) / std2d
            mask = numpy.logical_or(delta > sigma_hi,
                                    delta < -sigma_lo)
        dummies = mask.sum()
        if dummies == 0:
            break
        image[mask] = numpy.NaN
        mean = numpy.nanmean(image, axis=axis, dtype="float64")
        std = numpy.nanstd(image, axis=axis, dtype="float64")
    return mean, std
Exemplo n.º 4
0
 def predict(self, data=None):
     if(self.use_period):
         # decomfreq = freq
         res = sm.tsa.seasonal_decompose(self.data.tolist(), freq=self.freq, model=self.model)
         #     res.plot()
         median_trend = pd.rolling_median(Series(self.data),window=self.freq, center=True, min_periods=1)
         resid = res.observed - res.seasonal - median_trend
     else:
         resid = self.data
     random = Series(resid)
     mean_nan = 0
     std_nan = 0
     # random = res.resid
     if (self.mode == 'average'):
         mean_nan = np.nanmean(random)
         std_nan = np.nanstd(random)
     elif (self.mode == 'median'):
         rolling_median = pd.rolling_median(random,3,center=True, min_periods=1)
         mean_nan = np.nanmean(rolling_median)
         std_nan = np.nanstd(rolling_median)
     min_val = mean_nan - 4 * std_nan
     # max_val = mean(random, na.rm = T) + 4*sd(random, na.rm = T)
     max_val = mean_nan + 4 * std_nan
     position = Series(resid.tolist(), index=np.arange(resid.shape[0]))
     anomaly = position[(position > max_val) | (position < min_val)]
     # anomalyL = position[(position<min_val)]
     # anomaly = anomalyH.append(anomalyL).drop_duplicates()
     point_anomaly_idx = anomaly.index
     self.anomaly_idx = point_anomaly_idx
     points_anomaly = self.data[point_anomaly_idx]
     self.anomalies = points_anomaly
     return points_anomaly
def get_night_shifts(offset):
    """Returns the mean APASS-based shifts for all the offsets in the same night."""
    shifts = {}
    shifts_std = {}
    for band in ['r', 'i']:
        offset_shifts = []
        for sibling in red_groups[offset]:
            try:
                offset_shifts.append(SHIFTS[sibling][band + 'shift'])
            except KeyError:
                pass
        shifts[band] = np.nanmean(offset_shifts)
        shifts_std[band] = np.nanstd(offset_shifts)
    if offset not in blue_groups:
        for band in ['u', 'g', 'r2']:
            shifts[band] = np.nan
    else:
        for band in ['u', 'g', 'r2']:
            offset_shifts = []
            for sibling in blue_groups[offset]:
                try:
                    offset_shifts.append(SHIFTS[sibling][band + 'shift'])
                except KeyError:
                    pass
            shifts[band] = np.nanmean(offset_shifts)
            shifts_std[band] = np.nanstd(offset_shifts)
    return shifts, shifts_std
Exemplo n.º 6
0
def attenSig( comp , min_var_warn=0 , min_var_fail=0 , sd = False ):
    # comp: list of values to run the test on. Therefore, time interval determined by user
    # min_var_warn: minimum range of data variation to trip a warning
    # min_var_fail: minimum range of data variation to trip a fail
    # sd: if true, comp data stdev will be tested against stdev thresholds set by user
    if min_var_warn==0 and min_var_fail==0:
        raise ValueError ("Please indicate min&max deviance threshold values.")
    n=len(comp)

    if sd==True:
        if np.nanstd(comp) <= min_var_warn:
            flag = 3
        elif np.nanstd(comp) <= min_var_fail:
            flag = 4
        else:
            flag = 1


    if sd==False:
        test = abs(max(comp) - min(comp))
        if test <= min_var_warn:
            flag = 3
        elif test <= min_var_fail:
            flag = 4
        else:
            flag = 1

    flag_list = [flag*(x/x) for x in np.arange(1,(len(comp)+1))]

    return flag_list
Exemplo n.º 7
0
def get_variance_map2(a_plus_b, a_minus_b, bias_mask2, pix_mask, gain):
    #variance0 = a_minus_b
    #a_minus_b = a-b
    msk = bias_mask2 | pix_mask | ~np.isfinite(a_minus_b)

    from destriper import destriper
    variance0 = destriper.get_destriped(a_minus_b,
                                        msk,
                                        pattern=64,
                                        remove_vertical=False,
                                        hori=False)
    #variance0 = a_minus_b

    # stsci_median cannot be used due to too many array error.
    #ss = stsci_median([m1 for m1 in variance0],)
    dd1 = np.ma.array(variance0, mask=msk)
    ss = np.ma.median(dd1, axis=0)

    variance_ = variance0.copy()
    variance_[msk] = np.nan

    st = np.nanstd(variance_)
    st = np.nanstd(variance_[np.abs(variance_) < 3*st])

    variance_[np.abs(variance_-ss) > 3*st] = np.nan

    import scipy.ndimage as ni
    x_std = ni.median_filter(np.nanstd(variance_, axis=0), 11)

    variance_map0 = np.zeros_like(variance_) + x_std**2

    variance_map = variance_map0 + np.abs(a_plus_b)/gain # add poison noise in ADU
    return variance_map
Exemplo n.º 8
0
def main():
    os.system('modprobe w1-gpio')
    os.system('modprobe w1-therm')
    print len(sys.argv)
    if len(sys.argv) == 1:
        number_of_meas = 7
    else:
        print sys.argv[1]
        number_of_meas = int(sys.argv[1])
    print "number_of_measurements = " + str(number_of_meas)
    
    print "getting device files and serials..."
    THEDICT = _get_w1_tree_and_serials()
    
    print "reading sensors " + str(number_of_meas) + " times ..."
    for step in range(int(number_of_meas)):
        for sensor_id in THEDICT:
            if sensor_id[0:2] == '28' or sensor_id[0:2] == '10':
                temp = read_sensor_ds18b20(sensor_id,THEDICT[sensor_id]["path"])
                volt = "n.a."
                THEDICT[sensor_id]["temp"].append(temp)
                THEDICT[sensor_id]["volt"].append(0.)
            if sensor_id[0:2] == '26':
                temp,volt = read_sensor_ds2438(sensor_id,THEDICT[sensor_id]["path"])
                THEDICT[sensor_id]["temp"].append(temp)
                THEDICT[sensor_id]["volt"].append(volt)
            print "step " + str(step) + " " + sensor_id + " " + str(temp) + " " + str(volt)
    
    print "calculating individual and total means:"
    MEAN_IND = {}
    for sensor_id in THEDICT:
        MEAN_IND[sensor_id] = [
                                np.nanmean(np.array(THEDICT[sensor_id]["temp"])), 
                                np.nanmean(np.array(THEDICT[sensor_id]["volt"]))
                              ]
    total_temp = []
    total_volt = []
    for sensor_id in MEAN_IND:
        if sensor_id[0:2] == '28' or sensor_id[0:2] == '10':
            total_temp.append(MEAN_IND[sensor_id][0])
        if sensor_id[0:2] == '26':
            total_volt.append(MEAN_IND[sensor_id][1])
    mean_temp = np.nanmean(np.array(total_temp))
    mean_volt = np.nanmean(np.array(total_volt))
    
    print "temp mean: " + str(mean_temp) + " +/- " + str(np.nanstd(np.array(total_temp)))
    print "volt mean: " + str(mean_volt) + " +/- " + str(np.nanstd(np.array(total_temp)))
        
    
    print "calculating offsets..."
    OFFSETS = {}
    for sensor_id in MEAN_IND:
        OFFSETS[sensor_id] = [
                               MEAN_IND[sensor_id][0] - mean_temp, 
                               MEAN_IND[sensor_id][1] - mean_volt
                             ]
    print OFFSETS
            
    print "writing offsets..."
    write_offset(OFFSETS)
Exemplo n.º 9
0
def main():
	fname = 'cleaned_data/core_male_cleaned.csv'
	# print(filelength(fname))
	# fname = 'cleaned_data/core_patients_cleaned.csv'
	# code1 = URETHRAL_INJURY_CODES[0]
	PENILE_FRACTURE_CODE = '95913'
	if not sys.argv[1]:
		print('Please enter an argument')
		sys.exit(0)
	code1 = str(sys.argv[1])
	code2 = '95913'
	# try:
	# 	data_mat = np.loadtxt('cleaned_data/pxpywt.txt')
	# 	print('Done loading file! Starting analysis.')
	# except:
	print('cannot load data, going to try generating...')
	data_mat = binary_arrays(fname,code1,code2,1)
	true_stat = mi(data_mat)
	print(true_stat)
	# print(leaders(DXLIST))
	print('beginning surrogate_stats')
	surrogate_stats = surrogate_mi(data_mat)
	print('beginning bootstrap_stats')
	bootstrap_stats = bootstrap_mi(data_mat)
	np.savetxt('cleaned_data/{0}surrogate_stats.txt'.format(str(sys.argv[2]),),surrogate_stats,fmt='%f')
	np.savetxt('cleaned_data/{0}bootstrap_stats.txt'.format(str(sys.argv[2]),),bootstrap_stats,fmt='%f')
	# plt.hist(bootstrap_stats,50)
	# plt.show()
	print(wald_test(true_stat, np.nanstd(bootstrap_stats), np.nanmean(surrogate_stats), np.nanstd(surrogate_stats)))
Exemplo n.º 10
0
def ddiff_var_wb(lh_ctx_file, rh_ctx_file, sc_file):
    import numpy as np
    from generic_pipelines.moco_eval import wb_to_tss
    tss = wb_to_tss(lh_ctx_file, rh_ctx_file, sc_file)
    out_fname = 'ddiff.npz'
    np.savez(out_fname,diffvar=np.nanstd(tss,0), ddiffvar=np.nanstd(np.diff(tss,1,0),0))
    return out_fname
Exemplo n.º 11
0
def radial_contrast_flr(image, xc, yc, seps, zw, coron_thrupt, klip_thrupt=None):
    rad_flr_ctc = np.empty((len(seps)))
    assert(len(seps) == len(coron_thrupt))
    if klip_thrupt is not None:
        assert(len(seps) == len(klip_thrupt))
        rad_flr_ctc_ktc = np.empty((len(seps)))
    else:
        rad_flr_ctc_ktc = None

    imh = image.shape[0]
    imw = image.shape[1]

    xs = np.arange(imw) - xc
    ys = np.arange(imh) - yc
    XXs, YYs = np.meshgrid(xs, ys)
    RRs = np.sqrt(XXs**2 + YYs**2)

    for si, sep in enumerate(seps):
        r_in = np.max([seps[0], sep-zw/2.])
        r_out = np.min([seps[-1], sep+zw/2.])
        meas_ann_mask = np.logical_and(np.greater_equal(RRs, r_in),
                                          np.less_equal(RRs, r_out))
        meas_ann_ind = np.nonzero(np.logical_and(np.greater_equal(RRs, r_in).ravel(),
                                                    np.less_equal(RRs, r_out).ravel()))[0]
        meas_ann = np.ravel(image)[meas_ann_ind]
        rad_flr_ctc[si] = np.nanstd(meas_ann)/coron_thrupt[si]
        if rad_flr_ctc_ktc is not None:
            rad_flr_ctc_ktc[si] = np.nanstd(meas_ann)/coron_thrupt[si]/klip_thrupt[si]

    #pdb.set_trace()
    return rad_flr_ctc, rad_flr_ctc_ktc
Exemplo n.º 12
0
    def plotQE(self, save=False):
        '''
        Plot the measured and theoretical QE
        '''
        fig, ax = plt.subplots()

        QE_median = np.array([np.nanmedian(QE.flatten()) for QE in self.QE])
        QE_upper = np.array([QE_median[ind] + np.nanstd(QE.flatten())
                             for ind, QE in enumerate(self.QE)])
        QE_lower = np.array([QE_median[ind] - np.nanstd(QE.flatten())
                             for ind, QE in enumerate(self.QE)])

        ax.plot(self.wavelengths, QE_median * 100, linewidth=3, color='black',
                label=r'Measured')
        ax.fill_between(self.wavelengths, QE_lower * 100, QE_upper * 100,
                        where=QE_upper >= QE_lower, color='green', facecolor='green',
                        interpolate='True', alpha=0.1)
        ax.plot(self.wvl_theory, 100 * self.QE_theory, linestyle='-.', linewidth=2,
                color='black', label=r'Theoretical')

        ax.set_xlabel(r'Wavelength (nm)')
        ax.set_ylabel(r'QE (%)')
        ax.legend()
        ax.set_xlim([min(self.wavelengths), max(self.wavelengths)])
        ax.set_ylim([0, max(QE_upper) * 100 * 1.2])

        if save:
            file_name = os.path.join(self.out_directory, self.log_file + '.pdf')
            plt.savefig(file_name, format='pdf')
            plt.close(fig)
        else:
            plt.show(block=False)
Exemplo n.º 13
0
 def _get_x_0_stats(self):
     x_diff = np.diff(self.x_arr_0, axis=1)
     mu_mm = np.nanmean(x_diff)
     std_mm = np.nanstd(x_diff)
     mu_px_mm = np.nanmean(x_diff / self.aramis_info.n_px_facet_step_x)
     std_px_mm = np.nanstd(x_diff / self.aramis_info.n_px_facet_step_x)
     return mu_mm, std_mm, mu_px_mm, std_px_mm
def bin_fit(x, y, buckets=3):
     
    assert buckets in [3,25]

    xstd=np.nanstd(x)
    
    if buckets==3:
        binlimits=[np.nanmin(x), -xstd/2.0,xstd/2.0 , np.nanmax(x)]
    elif buckets==25:
    
        steps=xstd/4.0
        binlimits=np.arange(-xstd*3.0, xstd*3.0, steps)
    
        binlimits=[np.nanmin(x)]+list(binlimits)+[np.nanmax(x)]
    
    fit_y=[]
    err_y=[]
    x_values_to_plot=[]
    for binidx in range(len(binlimits))[1:]:
        lower_bin_x=binlimits[binidx-1]
        upper_bin_x=binlimits[binidx]

        x_values_to_plot.append(np.mean([lower_bin_x, upper_bin_x]))

        y_in_bin=[y[idx] for idx in range(len(y)) if x[idx]>=lower_bin_x and x[idx]<upper_bin_x]

        fit_y.append(np.nanmedian(y_in_bin))
        err_y.append(np.nanstd(y_in_bin))

    ## no zeros
    

    return (binlimits, x_values_to_plot, fit_y, err_y)
def checkAbundanceScatterClusters():
    # First read the cluster data
    cldata= read_clusterdata.read_caldata()
    # Read the allStar data to match
    # For each of the calibration open clusters, calculate the offset from the 
    # mean in our FEHTAG and AFETAG
    clusters= ['M71','N2158','N2420','N188','M67','N7789','N6819',
               'N6791']
    fehoffset= []
    afeoffset= []
    for cluster in clusters:
        tdata= cldata[cldata['CLUSTER'] == cluster.upper()]
        tdata= tdata[(tdata['TEFF'] < _TEFFMAX)\
                         *(tdata['TEFF'] > _TEFFMIN)\
                         *(tdata['LOGG'] < 3.5)]
        # Compute the average feh and afe and save the offsets
        medianfeh= numpy.median(tdata['FE_H'])
        medianafe= numpy.median(tdata[define_rcsample._AFETAG])
        fehoffset.extend(tdata['FE_H']-medianfeh)
        afeoffset.extend(tdata[define_rcsample._AFETAG]-medianafe)
        if cluster == 'M67': print medianfeh, medianafe, len(tdata)
    fehoffset= numpy.array(fehoffset)
    afeoffset= numpy.array(afeoffset)
    print 'FE_H scatter %g' % (numpy.nanstd(fehoffset[numpy.fabs(fehoffset) < 0.3]))
    print 'A_FE scatter %g' % (numpy.nanstd(afeoffset[numpy.fabs(afeoffset) < 0.3]))
    gindx= (numpy.fabs(fehoffset) < 0.3)*(numpy.fabs(afeoffset) < 0.3)
    print 'FE_H/A_FE correlation %g' % (numpy.mean(afeoffset[gindx]*fehoffset[gindx])/numpy.nanstd(fehoffset[numpy.fabs(fehoffset) < 0.3])/numpy.nanstd(afeoffset[numpy.fabs(afeoffset) < 0.3]))
    print 'FE_H robust scatter %g' % (1.4826*numpy.median(numpy.fabs(fehoffset)))
    print 'A_FE robust scatter %g' % (1.4826*numpy.median(numpy.fabs(afeoffset)))
    bovy_plot.bovy_print()
    bovy_plot.bovy_hist(fehoffset,range=[-0.3,0.3],bins=31,histtype='step')
    bovy_plot.bovy_hist(afeoffset,range=[-0.3,0.3],bins=31,histtype='step',
                        overplot=True)
    bovy_plot.bovy_end_print('test.png')
    return None
def compare_iter(arr_len, n_iter):
    """
    Use bubble, quick and merge sorts of random arrays of a set length,
    for n_iter times. Then return mean and standard deviations of results
    The arrays are limited to values less than 1000.
    """
    bubble_comps = []
    quick_comps  = []
    merge_comps  = []

    # Perform sorting the required number of times:
    for ind in range(n_iter):
        rand_arr = np.random.randint(1000, size = arr_len)
        bubble_comps.append(bubble_sort(rand_arr, 0))
        quick_comps.append(quick_sort(rand_arr, 0))
        merge_comps.append(merge_sort(rand_arr, 0))

    # Extract the number of comparisons:
    bub_no = np.array([x[0] for x in bubble_comps])
    qck_no = np.array([x[0] for x in quick_comps])
    mrg_no = np.array([x[0] for x in merge_comps])

    # Calculate mean and standard deviations:
    bub_mean   = np.nanmean(bub_no)
    qck_mean   = np.nanmean(qck_no)
    mrg_mean   = np.nanmean(mrg_no)
    bub_stddev = np.nanstd(bub_no)
    qck_stddev = np.nanstd(qck_no)
    mrg_stddev = np.nanstd(mrg_no)

    # Return the means and standard deviations
    return bub_mean, bub_stddev, qck_mean, qck_stddev, mrg_mean, mrg_stddev
Exemplo n.º 17
0
def score_signal_to_noise(a, b, axis=0):
    mean_a = np.nanmean(a, axis=axis)
    mean_b = np.nanmean(b, axis=axis)

    std_a = np.nanstd(a, axis=axis, ddof=1)
    std_b = np.nanstd(b, axis=axis, ddof=1)

    return (mean_a - mean_b) / (std_a + std_b)
Exemplo n.º 18
0
def test_zernikes_rms(nterms=10, size=500):
    """Verify RMS(Zernike[n,m]) == 1."""
    assert np.nanstd(zernike.zernike1(1)) == 0.0, "Zernike(j=0) has nonzero RMS"
    for j in range(2, nterms):
        n, m = zernike.noll_indices(j)
        Z = zernike.zernike(n, m, npix=size)
        rms = np.nanstd(Z)  # exclude masked pixels
        assert 1.0 - rms < 0.001, "Zernike(j={}) has RMS value of {}".format(j, rms)
Exemplo n.º 19
0
def pickVMIN(dataIn, devs):
    try:
        if np.nanmean(dataIn) - devs * np.nanstd(dataIn) < np.nanmin(dataIn):
            return np.nanmin(dataIn)
        else:
            return np.nanmean(dataIn) - devs * np.nanstd(dataIn)
    except ValueError:
        return 1
Exemplo n.º 20
0
def ddiff_var_moco(in_file):
    import numpy as np
    import h5py
    ts = h5py.File(in_file,'r')
    tss = np.asarray(ts['FMRI/DATA']).T
    out_fname = 'ddiff.npz'
    np.savez(out_fname,diffvar=np.nanstd(tss,0), ddiffvar=np.nanstd(np.diff(tss,1,0),0))
    return out_fname
Exemplo n.º 21
0
 def estimate_f(self, i, samples = 200):
     
     #minibatch = np.random.choice(X,batch_size,replace=False)
     
     LL_X = np.array([self.f['LL'](i) for s in range(samples if samples != None else self.samples)])
     LL_Y = np.array([self.f['LLY'](i) for s in range(samples if samples != None else self.samples)])
     
     return np.nanmean(LL_X, 0)+np.nanmean(LL_Y, 0), np.nanstd(LL_X, 0)+np.nanstd(LL_Y, 0)
Exemplo n.º 22
0
def pickVMAX(dataIn, devs):
    try:
        if np.nanmean(dataIn) + devs * np.nanstd(dataIn) > np.nanmax(dataIn):
            return np.nanmax(dataIn)
        else:
            return np.nanmean(dataIn) + devs * np.nanstd(dataIn)
    except ValueError:
        return 1
Exemplo n.º 23
0
 def calc_perf_stats(self):
     """Calculate mean performance based on trimmed time series."""
     self.mean_tsr, self.std_tsr = nanmean(self.tsr), nanstd(self.tsr)
     self.mean_cp, self.std_cp = nanmean(self.cp), nanstd(self.cp)
     self.mean_cd, self.std_cd = nanmean(self.cd), nanstd(self.cd)
     self.mean_ct, self.std_ct = nanmean(self.ct), nanstd(self.ct)
     self.mean_u_enc = nanmean(self.tow_speed)
     self.std_u_enc = nanstd(self.tow_speed)
Exemplo n.º 24
0
    def test_nanstd(self):
        tgt = np.std(self.mat)
        for mat in self.integer_arrays():
            assert_equal(np.nanstd(mat), tgt)

        tgt = np.std(self.mat, ddof=1)
        for mat in self.integer_arrays():
            assert_equal(np.nanstd(mat, ddof=1), tgt)
Exemplo n.º 25
0
def normalize(m1, m2, m3, m4):

	norm1 = (m1 - np.nanmean(m1))/np.nanstd(m1)
	norm2 = (m2 - np.nanmean(m2))/np.nanstd(m2)
	norm3 = (m3 - np.nanmean(m3))/np.nanstd(m3)
	norm4 = (m4 - np.nanmean(m4))/np.nanstd(m4)

	return norm1, norm2, norm3, norm4
def pretty_gradual_plot(data, concentrations, strain_name_map, drug_name, blank_line=200):

    def inner_scatter_plot(mean, std, relative, limiter=4):
        series = np.zeros(mean.shape)
        cell_type = np.zeros(mean.shape)
        for i, name in enumerate(names):
            series[i, :] = np.arange(i, c.shape[0]*(len(names)+40)+i, len(names)+40)
            cell_type[i, :] = i
            plt.scatter(series[i, :], mean[i, :], c=cm(i/float(len(names))), s=35, label=name)
        plt.errorbar(series.flatten(), mean.flatten(), yerr=std.flatten(), fmt=None, capsize=0)
        plt.xticks(np.mean(series, axis=0), c)
        plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=len(names)/limiter, mode="expand", borderaxespad=0.,prop={'size':6})
        if not relative:
            plt.axhline(y=blank_line)
        plt.show()

    filter = np.all(np.logical_not(np.isnan(data)), axis=(1, 2))
    names = [strain_name_map[i] for i in filter.nonzero()[0].tolist()]
    c = concentrations[filter, :][0, :]
    mean = np.nanmean(data[filter, :, :], axis=-1)
    std = np.nanstd(data[filter, :, :], axis=-1)
    cm = plt.cm.get_cmap('spectral')

    refmean = mean[:, 0].reshape((mean.shape[0], 1))
    refstd = std[:, 0].reshape((mean.shape[0], 1))
    rel_mean, rel_std = (mean/refmean, np.sqrt(np.power(refstd, 2)+np.power(std, 2))/mean)

    inner_scatter_plot(mean, std, False)
    inner_scatter_plot(rel_mean, rel_std, True)

    mean_mean = np.nanmean(mean, axis=0)
    std_mean = np.nanstd(mean, axis=0)
    mean_std = np.nanmean(std, axis=0)
    total_std = np.sqrt(np.power(std_mean, 2) + np.power(mean_std, 2))
    confusables = np.sum(mean - std < blank_line, axis=0) / float(len(names))

    rel_mean_mean = np.nanmean(rel_mean, axis=0)
    rel_std_mean = np.nanstd(rel_mean, axis=0)
    rel_mean_std = np.nanmean(rel_std, axis=0)
    rel_total_std = np.sqrt(np.power(rel_std_mean, 2) + np.power(rel_mean_std, 2))

    plt.subplot(212)
    plt.plot(mean_mean, c=cm(0.00), label='mean of mean')
    plt.plot(mean_std, c=cm(.25), label='mean of std')
    plt.plot(std_mean, c=cm(.50), label='std of mean')
    plt.plot(total_std, c=cm(0.75), label='total std')
    # plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0.,prop={'size':8})
    plt.axhline(y=blank_line)

    plt.subplot(211)
    plt.plot(rel_mean_mean, c=cm(0.00), label='mean of mean')
    plt.plot(rel_mean_std, c=cm(.25), label='mean of std')
    plt.plot(rel_std_mean, c=cm(.50), label='std of mean')
    plt.plot(rel_total_std, c=cm(0.75), label='total std')
    plt.plot(confusables, c=cm(0.9), label='confusable with null')
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0.,prop={'size':8})

    plt.show()
Exemplo n.º 27
0
def match_fits(fitsfile1, fitsfile2, header=None, sigma_cut=False,
               use_mad_std=True,
               return_header=False, **kwargs):
    """
    Project one FITS file into another's coordinates
    If sigma_cut is used, will try to find only regions that are significant
    in both images using the standard deviation

    Parameters
    ----------
    fitsfile1: str
        Reference fits file name
    fitsfile2: str
        Offset fits file name
    header: pyfits.Header
        Optional - can pass a header to projet both images to
    sigma_cut: bool or int
        Perform a sigma-cut on the returned images at this level
    use_mad_std : bool
        Use mad_std instead of std dev for stddev estimation

    Returns
    -------
    image1,image2,[header] : Two images projected into the same space, and
    optionally the header used to project them
    """

    if header is None:
        header = load_header(fitsfile1)
        image1 = load_data(fitsfile1)
    else: # project image 1 to input header coordinates
        image1 = project_to_header(fitsfile1, header)

    # project image 2 to image 1 coordinates
    image2_projected = project_to_header(fitsfile2, header)

    if image1.shape != image2_projected.shape:
        raise ValueError("Failed to reproject images to same shape.")

    if sigma_cut:
        std1 = stats.mad_std(image1, ignore_nan=True) if use_mad_std else np.nanstd(image1)
        std2 = stats.mad_std(image2_projected, ignore_nan=True) if use_mad_std else np.nanstd(image2_projected)
        corr_image1 = image1*(image1 > std1*sigma_cut)
        corr_image2 = image2_projected*(image2_projected > std2*sigma_cut)
        OK = np.isfinite(corr_image1) & np.isfinite(corr_image2)
        if (corr_image1[OK]*corr_image2[OK]).sum() == 0:
            print("Could not use sigma_cut of %f because it excluded all valid data" % sigma_cut)
            corr_image1 = image1
            corr_image2 = image2_projected
    else:
        corr_image1 = image1
        corr_image2 = image2_projected

    returns = corr_image1, corr_image2
    if return_header:
        returns = returns + (header,)
    return returns
Exemplo n.º 28
0
 def calc_sn(self, w1=4700., w2=6000.):
     idx = np.logical_and(self.w[self.goodpixels] >=w1,
                          self.w[self.goodpixels] <=w2)
     self.res = self.galaxy[self.goodpixels] - self.bestfit[self.goodpixels]
     # Using robust method to calculate noise using median deviation
     self.noise = np.nanstd(self.res[idx])
     self.signal = np.nanstd(self.galaxy[self.goodpixels][idx])
     self.sn = self.signal / self.noise
     return
Exemplo n.º 29
0
def normalize_raster(src_raster, axis=None):
    ''' This assumes that multi-band rasters are dimesioned (bands, rows, cols) '''
    fail = False
    message = {'a':"Normalization will occur using the entire raster",
               'b':"Normalization will occur by band",
               'p':"Normalization will occur across pixels",
               'r':"Normalization will occur across rows",
               'c':"Normalization will occur across columns"}
    rast_dims = src_raster.shape
    num_dims = len(rast_dims)
    if num_dims > 3:
        print("Warning:function can only normalize two and three dimension rasters")
        return False
    else:
        if axis is None:
            axis = 'a'
            print(message[axis])
        elif axis is int():
            if axis >= num_dims:
                alt_msg = " (axis value exceeds the raster's dimensions)"
                fail = True
            elif axis < 0:
                alt_msg = ''
                fail = True
            elif num_dims == 2:
                axis = {0:'r', 1:'c'}[axis]
            else:
                axis = {0:'p', 1:'r', 2:'c'}[axis]
        if fail or axis not in message:
            print("Warning:invalid axis value%s." % alt_msg)
            print("\tChoose a value from the following list:")
            for k, v in  message.iteritems() : print("\t\t'%s' for %s" % (k, v.replace('will occur ', '').lower()))
            return False
        axis = axis.lower()[0]
        if num_dims == 3:
            axes= {'a':None, 'p':0, 'c':1, 'r':2}
            shape = {'a':rast_dims, 'p':rast_dims[0], 'c':rast_dims[1], 'r':rast_dims[2]}
        else:
            axes= {'a':None, 'b':None, 'p':None, 'c':0, 'r':1}
            shape = {'a':rast_dims, 'p':rast_dims, 'c':rast_dims[0], 'r':rast_dims[1]}
            #d, nRows, nCols = (0,) + rast_dims
            if axis == 'p':
                print("%s %s" %
                      ("Warning:two-dimensional rasters can not be normalized at the pixel-level.", message['a']))
    print("Normalizing %s" % message[axis][25:])
    if (num_dims == 3) & (axis == 'b'):
	mean_=np.array([np.tile(np.nanmean(src_raster[b]), rast_dims[1:]) for b in range(rast_dims[0])])
	std_=np.array([np.tile(np.nanstd(src_raster[b]), rast_dims[1:]) for b in range(rast_dims[0])])
    else:
	mean_ = np.tile(np.nanmean(src_raster, axis=axes[axis]), shape[axis]).reshape(rast_dims)
	std_ = np.tile(np.nanstd(src_raster, axis=axes[axis]), shape[axis]).reshape(rast_dims)
    if (std_ == 0).any():
        print("Warning:Divide by zero error (there is a zero value in the standard deviation)")
        return False
    else:
        norm_img = (src_raster - mean_)/std_    
    return norm_img
Exemplo n.º 30
0
def init_data_avg(data_regs, flmts, avg='a'):
    """Average region FWHM data that has already been retrieved

    Currently reporting standard error of mean as errors
    but, for small sample sizes, we must use Student's t-distribution.
    For 70% confidence interval, multiply by 1.963 for dof=1 (n=2)
    For 90% confidence interval, multiply by 6.3 for dof=1 (n=2) (!!!)

    Input:
        data_all (dict): keyed by region number, w/ kevs/data/eps/inds (use init_data)
        flmts (dict): filament number paired w/ constituent region numbers
        avg (str): 'a' or 'g' for arithmetic, geometric averages & errors
    Output:
        new dict, keyed by filament numbers, w/ averaged data
        also includes n_pts (number of points averaged in each band)
    """
    data_avgd = {}

    for n_flmt, n_regs in flmts.items():
        kevs = []
        data_all = []
        for m in n_regs:
            kevs_r, data_r, eps_r, inds_r = data_regs[m]
            if len(kevs) > 0 and not np.array_equal(kevs, kevs_r):
                raise Exception('Discrepancy in stored kevs')
            kevs = kevs_r
            data_all.append(data_r)

        data_all = np.array(data_all)
        n_pts = np.sum(~np.isnan(data_all),axis=0)  # Number of usable FWHMs in each energy band
        inds = np.where(n_pts)[0]

        # Lots of unsafe computation -- but, if n=0 we will throw it out anyways
        if avg == 'a':
            data = np.nanmean(data_all, axis=0)
            std = np.nanstd(data_all, axis=0, ddof=1)
            std[np.where(np.isnan(std))] = np.nanmax(std)  # Catch n=1 cases (stdev ill-defined)
            eps = std / np.sqrt(n_pts)  # Standard error of the mean
        elif avg == 'g':
            # Compute errors in log space first, before setting NaNs in data to 1
            std_log = np.nanstd(np.log(data_all), axis=0, ddof=1)
            std_log[np.where(np.isnan(std_log))] = np.nanmax(std_log)  # Catch n=1 cases (stdev ill-defined)
            eps_log = std_log / np.sqrt(n_pts)  # Convert to standard err in log space

            data_all[np.isnan(data_all)] = 1  # Identity for multiplying
            data = np.power(np.prod(data_all, axis=0), 1./n_pts)

            # Convert back to original parameter space
            eps_upper = np.exp(np.log(data) + eps_log) - data
            eps_lower = data - np.exp(np.log(data) - eps_log)
            # We need to provide a symmetric error for fitting
            eps = np.maximum(eps_upper, eps_lower)

        data_avgd[n_flmt] = kevs, data, eps, inds, n_pts

    return data_avgd
def thing(retstrip):
    avg=np.nanmean(retstrip)
    std=np.nanstd(retstrip)
    length=len([x for x in retstrip if not np.isnan(x)])
    return (avg, std, length, std/(length**.5))
Exemplo n.º 32
0
for j, penalty in enumerate(penalties):
    LR = LogReg(penalty)
    LR.set_learning_params(a=a, b=b)
    for k in range(N_repetitions):
        try:
            LR.fit(X_train,
                   Z_train,
                   n_minibatches,
                   n_epochs,
                   std_beta=std_beta)
            Z_pred = LR.classify(X_test)

            accuracy[j, k] = ACC(Z_test, Z_pred)
        except:
            print(f"Crash with penalty = {penalty}.")
            accuracy[j, k] = np.nan
np.seterr(all='ignore')

plt.errorbar(x=penalties,
             y=np.nanmean(accuracy, axis=1),
             yerr=np.nanstd(accuracy, axis=1),
             fmt="o",
             capsize=5,
             ms=5)
plt.xscale("log")
plt.xlabel(f"penalty ($\lambda$)", fontsize=12)
plt.ylabel("accuracy", fontsize=12)
plt.title("Accuracy score", fontsize=15)
plt.savefig("Figures/LR_acc_penalties.png", dpi=300)
plt.show()
                    plt.plot(x_data[ssp_option, model_i], y_data[ssp_option, model_i], marker=model_shapes[model_i], color='b', markersize=20, mew=5)


            # observational constraint
            obs_data_model = obs_data[model_i+(ssp_option*n_models)]
            
            
    # saving x_data and y_data
    flat_x_array = x_data.flatten()
    flat_y_array = y_data.flatten()
    flat_x_array = flat_x_array[flat_x_array==flat_x_array]
    flat_y_array = flat_y_array[flat_y_array==flat_y_array]
            
    # std of constrained values         
    x_obs = np.nanmean(obs_data)
    dx_obs = np.nanstd(obs_data)


    # creating constrained data line
    x_line = np.linspace(-xmin_limit, xmax_limit, 100)
    global_array = np.zeros([100,1])
    global_array = np.squeeze(global_array)
    for b in range(0,100):
        global_array[b] = x_obs
    plt.plot(global_array, x_line, color='darkgreen', linewidth=2, alpha=1)
        
    plt.axvspan(x_obs-dx_obs, x_obs+dx_obs, color='lightgreen', alpha=0.8, zorder=20)

    # calculating the constrained values
    x_values = np.loadtxt("saved_data/combined_x_"+str(min_temperature)+"_degree_warming_cmip6.csv", delimiter=",")
    y_values = np.loadtxt("saved_data/combined_y_"+str(min_temperature)+"_degree_warming_cmip6.csv", delimiter=",")
Exemplo n.º 34
0
    def featureFunc(self,vals,ts):

        return np.nanstd(vals)
def featureextraction(print_):
    
    # Amount of people per embarkment location
    
    (train,test) = importdata()
    
    full_data = pd.concat([train,test])
    
  
    
    #fill in missing observations with most observed S
    full_data['Embarked'] = full_data['Embarked'].replace(np.nan, "S", regex=True)
    if print_==True:
        print("changed missing data to S")
    
    full_data.loc[full_data['Embarked']=='C','Embarked']=0
    full_data.loc[full_data['Embarked']=='Q','Embarked']=1
    full_data.loc[full_data['Embarked']=='S','Embarked']=2
    if print_==True:
        print('\nchanged C,Q,S to 0,1,2')
    
    if print_==True:
        print('\nchanging Age feature')
    mean = np.nanmean(train['Age'])
    st_dev = np.nanstd(train['Age'])
    if print_==True:
        print('mean: ',np.round(mean,3), '\nSt_dev: ',np.round(st_dev,3))
    missing_obs = full_data['Age'].isnull().sum()
    
    if print_==True:
        print('filling in missing data with mean and st_dev')
    random_ages = np.round(np.random.normal(mean,st_dev,missing_obs)).astype(int)
    if print_==True:
        print(random_ages[random_ages<=0]) 
    random_ages[random_ages<=0] = np.random.uniform(len(random_ages[random_ages<=0]))
    if print_==True:
        print(random_ages)
        
    if print_==True:
        sns.distplot(random_ages)
        plt.show()
    
    full_data.loc[np.isnan(full_data['Age']),'Age'] = random_ages
    
    
    train['AgeBand'] = pd.cut(train['Age'], 5, precision = 0)
    if print_==True:
        print(train[['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True))
     
    
    full_data.loc[ full_data['Age'] <= 16, 'Age'] = 0
    full_data.loc[(full_data['Age'] > 16) & (full_data['Age'] <= 32), 'Age'] = 1
    full_data.loc[(full_data['Age'] > 32) & (full_data['Age'] <= 48), 'Age'] = 2
    full_data.loc[(full_data['Age'] > 48) & (full_data['Age'] <= 64), 'Age'] = 3
    full_data.loc[ full_data['Age'] > 64, 'Age'] = 4
    if print_==True:
        print(full_data.sample(8))
    
    
    if print_==True:
        print('Cutting on Fare')
    train['Fareband'] = pd.cut(train['Fare'], 5, precision = 0)
    if print_==True:
        print(train[['Fareband', 'Survived']].groupby(['Fareband'], as_index=False).mean().sort_values(by='Fareband', ascending=True))
    
    full_data.loc[ full_data['Fare'] <= 102, 'Fare'] = 0
    full_data.loc[(full_data['Fare'] > 102) & (full_data['Fare'] <= 205), 'Fare'] = 1
    full_data.loc[(full_data['Fare'] > 205) & (full_data['Fare'] <= 307), 'Fare'] = 2
    full_data.loc[(full_data['Fare'] > 307) & (full_data['Fare'] <= 410), 'Fare'] = 3
    full_data.loc[ full_data['Fare'] > 410, 'Fare'] = 4
   
    full_data.loc[full_data.PassengerId==1044,'Fare'] = 0
    
    if print_==True:
        print(full_data.sample(8))
    
    if print_==True:
        print('Creating family feature')
    train['FamilySize'] = train['SibSp'] + train['Parch'] + 1
    if print_==True:
        print(train[['FamilySize', 'Survived']].groupby(['FamilySize'], as_index=False).mean().sort_values(by='Survived', ascending=False))
    full_data['FamilySize'] = full_data['SibSp'] + full_data['Parch'] + 1
    
    if print_==True:
        print('extracting title')
    dataset_title = [i.split(",")[1].split(".")[0].strip() for i in full_data["Name"]]
    full_data["Title"] = pd.Series(dataset_title)
    
    if print_==True:
        full_data["Title"].head()
    
    if print_==True:
        print('converting to categorical data')
    full_data["Title"] = full_data["Title"].replace(['Lady', 'the Countess','Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
    full_data["Title"] = full_data["Title"].map({"Master":0, "Miss":1, "Ms" : 1 , "Mme":1, "Mlle":1, "Mrs":1, "Mr":2, "Rare":3})
    full_data["Title"] = full_data["Title"].astype(int)
    
    if print_==True:
        g = sns.factorplot(x="Title",y="Survived",data=full_data,kind="bar")
        g = g.set_xticklabels(["Master","Miss-Mrs","Mr","Rare"])
        g = g.set_ylabels("survival probability")
    
  
    if print_==True:
        print('converting male-female to 0-1')
    full_data = full_data.drop(labels = ["Name","Cabin", 'Ticket'], axis = 1)
    full_data.loc[ full_data['Sex'] == 'male', 'Sex'] = 0
    full_data.loc[ full_data['Sex'] == 'female', 'Sex'] = 1

    
    test = full_data.loc[full_data['PassengerId'].isin(test['PassengerId'])]
    test = test.drop(labels=["Survived"],axis = 1)
    train = full_data.loc[full_data['PassengerId'].isin(train['PassengerId'])]
    
    
    
    return train, test, full_data
Exemplo n.º 36
0
def load_cfs_ensemble(fcst_dir,init_date,par,numdays,bli,tri,
                      intervals,des_inds,returnpar='mean',daily=True):
    """
    Returns the the ensemble mean averaged forecast for the first [numdays] 
    days of the forecast. [init_date] is the forecast initialization date and 
    the ensemble (4 members) includes the 00Z, 06Z, 12Z, and 18Z runs. 
    [returnpar] specifies whether to return the ensemble mean, ensemble spread,
    ensemble stdv, or the individual members. The averaging intervals are 
    specified by [intervals], which is a list of integers specifiying the 
    number of weeks to average the forecast (0weeks = 1day).
    """
    import pygrib as pyg
    datestr = init_date.strftime('%Y%m%d')
    ls_fils = '{}/{}.{}*.grb2'.format(fcst_dir,check_input_var(par),datestr)
    fils = check_output(['ls -1a '+ls_fils],shell=True).split()
#    if not len(fils)==4:
#        raise IOError('There are only {} files for this day!!'.format(len(fils)))
    if returnpar=='mem1':
        fils = [fils[0]]
    mems = len(fils)
    ensemble = np.empty((mems,len(intervals),len(des_inds),bli[0]-tri[0],tri[1]-bli[1]))
    m = 0
    for cfs_file in fils:
        try:
            hh = int(cfs_file[-12:-10])
            print('   member{}: {}Z'.format(m+1,cfs_file[-12:-10]))
            grbs = pyg.open(cfs_file)
            if par in ['U200', 'U850']:
                if hh == 18:
                    grb = grbs.select(name='U component of wind',forecastTime=range(numdays*24+1))
                else:
                    grb = grbs.select(name='U component of wind',forecastTime=range((numdays+1)*24+1))
            elif par in ['V200', 'V850']:
                if hh == 18:
                    grb = grbs.select(name='V component of wind',forecastTime=range(numdays*24+1))
                else:
                    grb = grbs.select(name='V component of wind',forecastTime=range((numdays+1)*24+1))
            else:
                if hh == 18:
                    grb = grbs.select(forecastTime=range(numdays*24+1))
                else:
                    grb = grbs.select(forecastTime=range((numdays+1)*24+1))
            grbs.close()
            if daily:
                # Take daily average
                fcst_daily, ftims, skp = daily_ave(np.array([g.values for g in grb]), hh)
            if par == 'PRATE':
                    fcst_daily *= 60*60*24
            # Loop over all the different averaging intervals (0-6 weeks)
            for n in range(len(intervals)):
                numints = intervals[n]
                # Compute the mean for the interval at each fcst time
                if numints == 0:
                    fcst_mean = fcst_daily[des_inds,:,:]
                else:
                    ave_interval = numints*7
                    #average over the time axis
                    fcst_mean = np.array([np.nanmean(fcst_daily[j:(j+ave_interval),\
                               :,:],axis=0) for j in des_inds])
                
                ensemble[m,n,:,:,:] =  fcst_mean[:,tri[0]:bli[0],bli[1]:tri[1]]
        except ValueError:
            print('Houston, we have a ValueError')
            raise ValueError('There was a ValueError')
            ensemble[m,n,:,:,:] = np.nan
        except subprocess.CalledProcessError:
            raise IOError('There was a CalledProcessError!!')
        m += 1
    if returnpar=='members':
        return ensemble[0,:,:],ensemble[1,:,:],ensemble[2,:,:],ensemble[3,:,:]
    elif returnpar in ['mean','mem1']:
        return np.nanmean(ensemble,axis=0)
    elif returnpar=='stdv':
        return np.nanstd(ensemble,axis=0)
    elif returnpar=='spread':
        return np.amax(ensemble,axis=0) - np.amin(ensemble,axis=0)
    else:
        raise IOError('Invalid return parameter specified.')
#import random
#conditioner=[pd.Series([random.gauss(0.0, 1.0) for unused in ehup.index], ehup.index) for ehup in conditioner]





futreturns=[list(x.values) for x in dependent]
condvariable=[list(x.values) for x in conditioner]

futreturns=sum(futreturns, [])
condvariable=sum(condvariable, [])


cmean=np.nanmean(condvariable)
cstd=np.nanstd(condvariable)

condreturns_pair=[(fr,cv) for fr,cv in zip(futreturns, condvariable) if not np.isnan(fr) and not np.isnan(cv)]

upper_bound1=cmean+4*cstd
lower_bound1=cmean+cstd
condreturns=[fr for fr,cv in condreturns_pair if cv<upper_bound1 and cv>lower_bound1]
condreturns_cond=[cv for fr,cv in condreturns_pair if cv<=upper_bound1 and cv>lower_bound1]


upper_bound2=cmean+cstd
lower_bound2=cmean-cstd
condreturns2=[fr for fr,cv in condreturns_pair if cv<upper_bound2 and cv>lower_bound2]
condreturns_cond2=[cv for fr,cv in condreturns_pair if cv<=upper_bound2 and cv>lower_bound2]

def nanstderr(array):
    n = np.sum(np.logical_not(np.isnan(array)))
    return np.nanstd(array) / np.sqrt(n)
Exemplo n.º 39
0
##plt.scatter(opt_aS[1,:], opt_aR[1,:], marker='+', label='sigmoid P2')
##plt.scatter(opt_aS[2,:], opt_aR[2,:], marker='+', label='ReLU P1')
##plt.scatter(opt_aS[3,:], opt_aR[3,:], marker='+', label='ReLU P2')
##plt.scatter(0.342000, 0.684000, c='yellow', marker='o', label='P1')
# plt.scatter(0.684000, 0.342000, c='yellow', marker='o', label='P2')
# plt.xlabel('alpha_s')
# plt.ylabel('alpha_r')
# plt.legend()
# plt.title('Failed learning: dummy case')
# plt.plot()
# plt.savefig('figures_results/dummy_case.eps', format='eps', dpi=DPI)

"stats"
mean_opt_aR = np.nanmean(opt_aR, axis=1)
mean_opt_aS = np.nanmean(opt_aS, axis=1)
std_opt_aR = np.nanstd(opt_aR, axis=1)
std_opt_aS = np.nanstd(opt_aS, axis=1)

# "Significancy: independent t-tes for the two time constants"
#
## target time constants 0.34, 0.68
# t_S1,p_S1 = stats.ttest_ind(opt_aS[0,:], opt_aS[2,:], equal_var=False, nan_policy='omit')
# t_R1,p_R1 = stats.ttest_ind(opt_aR[0,:], opt_aR[2,:], equal_var=False, nan_policy='omit')
#
## target time constants 0.68, 0.34
# t_S2,p_S2 = stats.ttest_ind(opt_aS[1,:], opt_aS[3,:], equal_var=False, nan_policy='omit')
# t_R2,p_R2 = stats.ttest_ind(opt_aR[1,:], opt_aR[3,:], equal_var=False, nan_policy='omit')
# print('Significancy: independent t-tes for the two time constants')
# print('target time constants s=0.34, r=0.68:')
# print('alpha_s: p=%lf' %p_S1)
# print('alpha_r: p=%lf' %p_R1)
Exemplo n.º 40
0
def qualityControl(raw_df,
                   time_col='charttime',
                   name_col='label',
                   val_col='value',
                   ref_cov_list=None,
                   fig_dir='.',
                   fig_format='pdf',
                   plot=False,
                   savepath=None):
    # Filter for NaNs, any vitals measurements more than 3 standard deviations from the population mean.

    if ref_cov_list is None:
        ref_cov_list = raw_df[name_col].unique()
    cov_stat_df = pd.DataFrame(columns=[
        'covariate', 'raw_mean', 'raw_std', 'raw_min', 'raw_max', 'qc_mean',
        'qc_std', 'qc_min', 'qc_max'
    ])
    sub_data_df = pd.DataFrame(columns=raw_df.columns)

    for cov_name, cov_df in raw_df.groupby(name_col):
        print(cov_name)

        if (cov_name in ref_cov_list):
            raw_cov_val = cov_df[val_col].values
            cov_mean = np.nanmean(raw_cov_val)
            cov_std = np.nanstd(raw_cov_val)

            qc_cov_df = cov_df[~np.isnan(cov_df[val_col])]
            qc_cov_df = cov_df[cov_df[val_col] > 0]
            qc_cov_df = cov_df[cov_df[val_col] <= 999.0]
            qc_cov_df = qc_cov_df[cov_df[val_col] >= (cov_mean - 3 * cov_std)]
            qc_cov_df = qc_cov_df[qc_cov_df[val_col] <= (cov_mean +
                                                         3 * cov_std)]
            qc_cov_val = qc_cov_df[val_col].values
            cov_stat_df = cov_stat_df.append(
                {
                    'covariate': cov_name,
                    'raw_mean': np.nanmean(raw_cov_val),
                    'raw_std': np.nanstd(raw_cov_val),
                    'raw_min': np.nanmin(raw_cov_val),
                    'raw_max': np.nanmax(raw_cov_val),
                    'qc_mean': np.nanmean(qc_cov_val),
                    'qc_std': np.nanstd(qc_cov_val),
                    'qc_min': np.nanmin(qc_cov_val),
                    'qc_max': np.nanmax(qc_cov_val)
                },
                ignore_index=True)
            if plot:
                plt.figure(figsize=(12, 6))
                plt.subplot(1, 2, 1)
                sns.distplot(raw_cov_val[~np.isnan(raw_cov_val)])
                plt.title(cov_name + ': before qc')

                plt.subplot(1, 2, 2)
                sns.distplot(qc_cov_val)
                plt.title(cov_name + ': after qc')
                plt.savefig(
                    os.path.join(fig_dir,
                                 'qc_hist_{}.{}'.format(cov_name, fig_format)))

            sub_data_df = sub_data_df.append(qc_cov_df)

            if (savepath != None):
                pickle.dump((cov_stat_df, sub_data_df), open(savepath, 'wb'))
    return cov_stat_df, sub_data_df
Exemplo n.º 41
0
sns.boxplot(x=data_df.columns[1],
            y=data_df.columns[0],
            data=data_df,
            ax=axes[1, 0])

sns.barplot(x=data_df.columns[1],
            y=data_df.columns[0],
            data=data_df,
            ax=axes[1, 1])
#
# plt.show()
#Gaussian

x = data_n
mu = np.nanmean(x)
sigma = np.nanstd(x)
#print(mu, sigma)


def gauss_fun(x, mu, sigma):
    p = (1 / (np.sqrt(2 * np.pi * np.square(sigma)))) * np.exp(
        -(np.square(x - mu)) / (2 * np.square(sigma)))
    return p


#print(gauss_fun(data_n, mu, sigma))


# Gaussian logL
def gausslogl(x):
    logL = np.log(gauss_fun(x, mu, sigma))
Exemplo n.º 42
0
def scatter_plots():
    def func(x, a, b):
        return (a * x)**b

    Ic_Ni = []
    Ic_Ejecta = []
    Ic_tp = []
    Ic_Lp = []

    Ic_BL_Ni = []
    Ic_BL_Ejecta = []
    Ic_BL_tp = []
    Ic_BL_Lp = []

    Ib_Ni = []
    Ib_Ejecta = []
    Ib_tp = []
    Ib_Lp = []

    IIb_Ni = []
    IIb_Ejecta = []
    IIb_tp = []
    IIb_Lp = []

    GRB_SN_Ni = []
    GRB_SN_Ejecta = []
    GRB_SN_tp = []
    GRB_SN_Lp = []

    plt.figure()
    plt.ylabel('Ejecta Mass  [$M_{\odot}$]')
    plt.xlabel('$Ni^{56}$ Mass [$M_{\odot}$]')
    plt.title(
        'Plot of Ni Mass versus Ejecta Mass for Different Types of Supernovae')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlim(10**-2, 10**0.5)
    plt.ylim(10**-0.3, 10**1.2)
    plt.minorticks_on()

    for i in range(len(Ejecta_Masses) - 1):

        if Type[i] == 'Ic':

            Ic_Ni.append(Ni_Masses[i])
            Ic_Ejecta.append(Ejecta_Masses[i])

            Ic = plt.scatter(Ni_Masses[i],
                             Ejecta_Masses[i],
                             s=80,
                             marker='d',
                             color='red')
            plt.errorbar(Ni_Masses[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Ni_Masses_Err_[i],
                         color='red',
                         elinewidth=1)

        if Type[i] == 'Ic-BL':

            Ic_BL_Ni.append(Ni_Masses[i])
            Ic_BL_Ejecta.append(Ejecta_Masses[i])

            Ic_BL = plt.scatter(Ni_Masses[i],
                                Ejecta_Masses[i],
                                s=80,
                                marker='<',
                                color='blue')
            plt.errorbar(Ni_Masses[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Ni_Masses_Err_[i],
                         color='blue',
                         elinewidth=1)

        if Type[i] == 'Ib':

            Ib_Ni.append(Ni_Masses[i])
            Ib_Ejecta.append(Ejecta_Masses[i])

            Ib = plt.scatter(Ni_Masses[i],
                             Ejecta_Masses[i],
                             s=80,
                             marker='o',
                             color='green')
            plt.errorbar(Ni_Masses[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Ni_Masses_Err_[i],
                         color='green',
                         elinewidth=1)

        if Type[i] == 'IIb':

            IIb_Ni.append(Ni_Masses[i])
            IIb_Ejecta.append(Ejecta_Masses[i])

            IIb = plt.scatter(Ni_Masses[i],
                              Ejecta_Masses[i],
                              s=80,
                              marker='>',
                              color='orange')
            plt.errorbar(Ni_Masses[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Ni_Masses_Err_[i],
                         color='orange',
                         elinewidth=1)

        if Type[i] == 'GRB-SN':

            GRB_SN_Ni.append(Ni_Masses[i])
            GRB_SN_Ejecta.append(Ejecta_Masses[i])

            GRB_SN = plt.scatter(Ni_Masses[i],
                                 Ejecta_Masses[i],
                                 s=100,
                                 marker='*',
                                 color='palevioletred')
            plt.errorbar(Ni_Masses[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Ni_Masses_Err_[i],
                         color='palevioletred',
                         elinewidth=1)

    cri = plt.scatter(Ni_Masses[-1],
                      Ejecta_Masses[-1],
                      s=80,
                      marker='^',
                      color='lawngreen')
    plt.errorbar(Ni_Masses[-1],
                 Ejecta_Masses[-1],
                 Ejecta_Masses_Err_[-1],
                 Ni_Masses_Err_[-1],
                 color='lawngreen',
                 elinewidth=1)

    Ni_Masses_1 = Ni_Masses[np.logical_not(np.isnan(Ni_Masses))]
    Ejecta_Masses_1 = Ejecta_Masses[np.logical_not(np.isnan(Ni_Masses))]
    Ejecta_Masses_2 = Ejecta_Masses_1[np.logical_not(
        np.isnan(Ejecta_Masses_1))]
    Ni_Masses_2 = Ni_Masses_1[np.logical_not(np.isnan(Ejecta_Masses_1))]

    plt.legend([Ic, Ic_BL, Ib, IIb, cri, GRB_SN],
               ['Ic', 'Ic_BL', 'Ib', 'IIb', '2019cri', 'GRB-SN'],
               loc='upper left')
    plt.show()
    plt.close()

    R_coeff_1 = np.nansum(
        (Ni_Masses - np.nanmean(Ni_Masses) / np.nanstd(Ni_Masses)) *
        (Ejecta_Masses - np.nanmean(Ejecta_Masses) / np.nanstd(Ejecta_Masses))
    ) / (len(Ejecta_Masses) - 1)

    # Second
    plt.figure()
    plt.xlabel('$t_{p}$ [Days]')
    plt.ylabel('Log($L_{p}$) [$erg\ s^{-1}$]')
    plt.xscale('linear')
    plt.xlim(10**0.8, 10**1.88)
    plt.title(
        'Plot of the Log(Peak Luminosity) versus Rise Time for Different Types of Supernovae'
    )
    plt.minorticks_on()

    for i in range(len(Ejecta_Masses) - 1):

        if Type[i] == 'Ic':

            Ic_tp.append(t_p[i])
            Ic_Lp.append(Log_Lp[i])

            Ic = plt.scatter(t_p[i], Log_Lp[i], s=80, marker='d', color='red')
            plt.errorbar(t_p[i],
                         Log_Lp[i],
                         Log_Lp_Err_[i],
                         t_p_Err_[i],
                         color='red',
                         elinewidth=1)

        if Type[i] == 'Ic-BL':

            Ic_BL_tp.append(t_p[i])
            Ic_BL_Lp.append(Log_Lp[i])

            Ic_BL = plt.scatter(t_p[i],
                                Log_Lp[i],
                                s=80,
                                marker='<',
                                color='blue')
            plt.errorbar(t_p[i],
                         Log_Lp[i],
                         Log_Lp_Err_[i],
                         t_p_Err_[i],
                         color='blue',
                         elinewidth=1)

        if Type[i] == 'Ib':

            Ib_tp.append(t_p[i])
            Ib_Lp.append(Log_Lp[i])

            Ib = plt.scatter(t_p[i],
                             Log_Lp[i],
                             s=80,
                             marker='o',
                             color='green')
            plt.errorbar(t_p[i],
                         Log_Lp[i],
                         Log_Lp_Err_[i],
                         t_p_Err_[i],
                         color='green',
                         elinewidth=1)

        if Type[i] == 'IIb':

            IIb_tp.append(t_p[i])
            IIb_Lp.append(Log_Lp[i])

            IIb = plt.scatter(t_p[i],
                              Log_Lp[i],
                              s=80,
                              marker='>',
                              color='orange')
            plt.errorbar(t_p[i],
                         Log_Lp[i],
                         Log_Lp_Err_[i],
                         t_p_Err_[i],
                         color='orange',
                         elinewidth=1)

        if Type[i] == 'GRB-SN':

            GRB_SN_tp.append(t_p[i])
            GRB_SN_Lp.append(Log_Lp[i])

            GRB_SN = plt.scatter(t_p[i],
                                 Log_Lp[i],
                                 s=100,
                                 marker='*',
                                 color='palevioletred')
            plt.errorbar(t_p[i],
                         Log_Lp[i],
                         Log_Lp_Err_[i],
                         t_p_Err_[i],
                         color='palevioletred',
                         elinewidth=1)

    cri = plt.scatter(t_p[-1], Log_Lp[-1], s=80, marker='^', color='lawngreen')
    plt.errorbar(t_p[-1],
                 Log_Lp[-1],
                 Log_Lp_Err_[-1],
                 t_p_Err_[-1],
                 color='lawngreen',
                 elinewidth=1)

    plt.legend([Ic, Ic_BL, Ib, IIb, cri, GRB_SN],
               ['Ic', 'Ic_BL', 'Ib', 'IIb', '2019cri', 'GRB_SN'],
               loc='upper right')
    plt.show()
    plt.close()

    # Third
    plt.figure()
    plt.xlabel('Log($L_{p}$) [$erg\ s^{-1}$]')
    plt.ylabel('Ejecta Mass [$M_{\odot}$]')
    plt.yscale('log')
    plt.ylim(10**-0.3, 10**1.3)
    plt.xlim(41.7, 43.8)
    plt.title(
        'Plot of the Log(Peak Luminosity) versus Ejecta Mass for Different Types of Supernovae'
    )
    plt.minorticks_on()

    for i in range(len(Ejecta_Masses) - 1):

        if Type[i] == 'Ic':

            Ic = plt.scatter(Log_Lp[i],
                             Ejecta_Masses[i],
                             s=80,
                             marker='d',
                             color='red')
            plt.errorbar(Log_Lp[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='red',
                         elinewidth=1)

        if Type[i] == 'Ic-BL':

            Ic_BL = plt.scatter(Log_Lp[i],
                                Ejecta_Masses[i],
                                s=80,
                                marker='<',
                                color='blue')
            plt.errorbar(Log_Lp[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='blue',
                         elinewidth=1)

        if Type[i] == 'Ib':

            Ib = plt.scatter(Log_Lp[i],
                             Ejecta_Masses[i],
                             s=80,
                             marker='o',
                             color='green')
            plt.errorbar(Log_Lp[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='green',
                         elinewidth=1)

        if Type[i] == 'IIb':

            IIb = plt.scatter(Log_Lp[i],
                              Ejecta_Masses[i],
                              s=80,
                              marker='>',
                              color='orange')
            plt.errorbar(Log_Lp[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='orange',
                         elinewidth=1)

        if Type[i] == 'GRB-SN':

            GRB_SN = plt.scatter(Log_Lp[i],
                                 Ejecta_Masses[i],
                                 s=100,
                                 marker='*',
                                 color='palevioletred')
            plt.errorbar(Log_Lp[i],
                         Ejecta_Masses[i],
                         Ejecta_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='palevioletred',
                         elinewidth=1)

    cri = plt.scatter(Log_Lp[-1],
                      Ejecta_Masses[-1],
                      s=80,
                      marker='^',
                      color='lawngreen')
    plt.errorbar(Log_Lp[-1],
                 Ejecta_Masses[-1],
                 Ejecta_Masses_Err_[-1],
                 Log_Lp_Err_[-1],
                 color='lawngreen',
                 elinewidth=1)

    plt.legend([Ic, Ic_BL, Ib, IIb, cri, GRB_SN],
               ['Ic', 'Ic_BL', 'Ib', 'IIb', '2019cri', 'GRB_SN'],
               loc='upper left')
    plt.show()
    plt.close()

    R_coeff_2 = np.nansum(
        ((Log_Lp - np.mean(Log_Lp)) / np.nanstd(Log_Lp)) *
        ((Ejecta_Masses - np.nanmean(Ejecta_Masses)) /
         np.nanstd(Ejecta_Masses))) / (len(Ejecta_Masses) - 1)

    # Fourth
    plt.figure()
    plt.xlabel('Log($L_{p}$) [$erg\ s^{-1}$]')
    plt.ylabel('$Ni^{56}$ Mass [$M_{\odot}$]')
    plt.yscale('log')
    plt.ylim(10**-2, 10**0.5)
    plt.title(
        'Plot of the Log(Peak Luminosity) versus $Ni^{56}$ Mass for Different Types of Supernovae'
    )
    plt.minorticks_on()

    for i in range(len(Ejecta_Masses) - 1):

        if Type[i] == 'Ic':

            Ic = plt.scatter(Log_Lp[i],
                             Ni_Masses[i],
                             s=80,
                             marker='d',
                             color='red')
            plt.errorbar(Log_Lp[i],
                         Ni_Masses[i],
                         Ni_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='red',
                         elinewidth=1)

        if Type[i] == 'Ic-BL':

            Ic_BL = plt.scatter(Log_Lp[i],
                                Ni_Masses[i],
                                s=80,
                                marker='<',
                                color='blue')
            plt.errorbar(Log_Lp[i],
                         Ni_Masses[i],
                         Ni_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='blue',
                         elinewidth=1)

        if Type[i] == 'Ib':

            Ib = plt.scatter(Log_Lp[i],
                             Ni_Masses[i],
                             s=80,
                             marker='o',
                             color='green')
            plt.errorbar(Log_Lp[i],
                         Ni_Masses[i],
                         Ni_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='green',
                         elinewidth=1)

        if Type[i] == 'IIb':

            IIb = plt.scatter(Log_Lp[i],
                              Ni_Masses[i],
                              s=80,
                              marker='>',
                              color='orange')
            plt.errorbar(Log_Lp[i],
                         Ni_Masses[i],
                         Ni_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='orange',
                         elinewidth=1)

        if Type[i] == 'GRB-SN':

            GRB_SN = plt.scatter(Log_Lp[i],
                                 Ni_Masses[i],
                                 s=100,
                                 marker='*',
                                 color='palevioletred')
            plt.errorbar(Log_Lp[i],
                         Ni_Masses[i],
                         Ni_Masses_Err_[i],
                         Log_Lp_Err_[i],
                         color='palevioletred',
                         elinewidth=1)

    cri = plt.scatter(Log_Lp[-1],
                      Ni_Masses[-1],
                      s=80,
                      marker='^',
                      color='lawngreen')
    plt.errorbar(Log_Lp[-1],
                 Ni_Masses[-1],
                 Ni_Masses_Err_[-1],
                 Log_Lp_Err_[-1],
                 color='lawngreen',
                 elinewidth=1)

    Ni_Masses_1 = Ni_Masses[np.logical_not(np.isnan(Ni_Masses))]
    Log_Lp_1 = Log_Lp[np.logical_not(np.isnan(Ni_Masses))]
    Ni_Masses_Err_1 = Ni_Masses_Err_[np.logical_not(np.isnan(Ni_Masses))]
    Log_Lp_Err_1 = Log_Lp_Err_[np.logical_not(np.isnan(Ni_Masses))]

    popt, pcov = curve_fit(func, Log_Lp_1, Ni_Masses_1, sigma=1 / Log_Lp_Err_1)
    x_new_1 = np.linspace(41.8, 43.5, 10000)
    y_new_1 = (popt[0] * x_new_1)**popt[1]
    plt.plot(x_new_1, y_new_1, 'r-')

    plt.legend([Ic, Ic_BL, Ib, IIb, cri, GRB_SN],
               ['Ic', 'Ic_BL', 'Ib', 'IIb', '2019cri', 'GRB_SN'],
               loc='upper left')
    plt.show()
    plt.close()

    Cal_Ni_Mass = (popt[0] * Log_Lp_1)**popt[1]
    print(f'Fit Equation: Ni_Mass = {popt[0]} * Log(L_p) ** {popt[1]}')
    Ni_Diff = ((Ni_Masses_1 - Cal_Ni_Mass) / Cal_Ni_Mass) * 100
    Mean_Diff = mean(Ni_Diff)
    Median_Diff = statistics.median(Ni_Diff)
    Stdev_Diff = statistics.stdev(Ni_Diff, Mean_Diff)

    plt.figure()
    plt.hist(Ni_Diff[0:45], bins=13, facecolor='g', edgecolor='k', alpha=0.75)
    plt.hist(Ni_Diff[-1],
             bins=1,
             facecolor='purple',
             edgecolor='k',
             alpha=0.75)
    plt.xlabel('Percentage [%]')
    plt.ylabel('Frequency')
    plt.title('Histogram of Percentage Differnce in Calculated $Ni^{56}$ Mass')
    plt.text(
        30, 9,
        f'$\mu$ = {Mean_Diff} % \n \n $\sigma$ = {Stdev_Diff} % \n \n Median = {Median_Diff} % '
    )
    plt.axvline(Median_Diff, color='k', linestyle='dashed', linewidth=2)
    plt.text(130, 4, 'SN2019cri ')
    plt.axvline(Ni_Diff[-1], color='k', linestyle='dashed', linewidth=0.9)
    plt.minorticks_on()

    return
Exemplo n.º 43
0
def std_ratio(pred, obs):
    return np.nanstd(pred)/np.nanstd(obs)
Exemplo n.º 44
0
def std(array):
    return np.nanstd(array)
Exemplo n.º 45
0
def normout(a):
    meancalc=np.nanmean(a)
    stdcalc=np.nanstd(a)
    normout=(np.tanh(((a-meancalc)/stdcalc)))
    return normout
Exemplo n.º 46
0
def estimate_vario(mp, npoints=2000, max_dist=None, interval=None):
    """ Given a Multipoint input, estimate a spatial variogram. By default, a
    variogram is quickly estimated. If npoints is None, then the full variogram
    is calculated.

    Parameters:
    -----------
    points : a Multipoint instance
    npoints : number of values use for variogram appromixation. If npoints is
              None, the full variogram is calculated.
    max_dist : the maximum lag
    interval : the lag interval

    Returns:
    --------
    lags : ndarray
    variogram : ndarray
    """
    if len(mp.data) != len(mp.vertices):
        raise Exception('estimate_variogram() requires a Multipoint with '
                        'scalar data')
    if npoints > len(mp):
        npoints = len(mp)

    irand = random.sample(np.arange(len(mp)), npoints)
    verts = mp.get_vertices()[irand]
    z = np.array([mp.data[i] for i in irand])

    if isinstance(mp.crs, GeographicalCRS):
        import warnings
        import pyproj
        warnings.warn(
            "For improved performance, consider projecting data to an "
            "approximately equidistant reference system first.")
        geod = pyproj.Geod(ellps="WGS84")

        def distfunc(a, b):
            return geod.inv(a[0], a[1], b[0], b[1])[2]
    else:
        distfunc = "euclidean"

    dist = pdist(verts, distfunc)
    I, J = ppairs(z)
    diff = z[I] - z[J]

    if max_dist is None:
        max_dist = dist.max()
    else:
        max_dist = float(max_dist)

    if interval is None:
        interval = max_dist / 10.0
    else:
        interval = float(interval)

    lags = np.arange(0, max_dist, interval)
    sigma_variance = np.empty_like(lags)
    for i, lag in enumerate(lags):
        band = (lag <= dist) * (dist < lag + interval)
        sigma_variance[i] = 0.5 * np.nanstd(diff[band])**2
    return lags, sigma_variance