Пример #1
0
def filterByMutualRemoval(data1, data2):
    nSTD = 1
    
    x=[]
    y=[]

    std1 = stats.tstd(data1)
    mean1 = stats.tmean(data1)
    
    std2 = stats.tstd(data2)
    mean2 = stats.tmean(data2)
    
    print 'm1, std1: ', mean1, std1
    print 'm2, std2: ', mean2, std2
    
    for i, value in enumerate(data1): 
        if (data1[i] > mean1 + (nSTD*std1)):
            pass
        elif (data1[i] < mean1 - (nSTD*std1)):
            pass
        elif data2[i] > mean2 + (nSTD*std2):
            pass           
        elif value < mean2 - (nSTD*std2):
            pass
        else:
            x.append(data1[i])
            y.append(data2[i])
                    
    return x,y
Пример #2
0
def num_features_smarch(samplefile_, n_):
    _configs = list()

    if os.path.exists(samplefile_):
        with open(samplefile_, "r") as sf:
            for line in sf:
                raw = line.split(',')
                config = raw[:len(raw) - 1]
                _configs.append(config)
    else:
        return -1

    _samples = list()
    if n_ < 0:
        _samples = _configs.copy()
    else:
        rands = get_random(n_, len(_configs))
        for r in rands:
            _samples.append(_configs[r - 1])

    _fnums = list()
    for sample in _samples:
        fnum = 0
        for v in sample:
            if not v.startswith('-'):
                fnum += 1
        _fnums.append(fnum)

    if n_ < 0:
        avg = stats.tmean(_fnums)
        std = stats.tstd(_fnums)
        return avg, std

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #3
0
def plot_acc_results(paths, name):
    
    plt.clf()
    for c, l, p in zip(['green', 'orange'], ['Random Teacher', 'RL Teacher'], paths):
        rewards = []
        with open(p) as file:
            reader = csv.reader(file, delimiter=',')
            for t in reader:
                rewards.append([])
                for r in t:
                    rewards[-1].append(float(r))

        rewards = rewards[:200]
        avg = [sum(x) / len(x) for x in rewards]
        high = [sum(x) / len(x) + stats.tstd(x) / 4 for x in rewards]
        low = [sum(x) / len(x) - stats.tstd(x) / 4 for x in rewards]

        plt.plot(avg, color=c, label=l)
        plt.fill_between(list(range(len(low))), low, high, alpha=.2, color=c)

    plt.xlabel('Student Steps')
    plt.ylabel('Validation Accuracy')
    plt.title('Student Performance Comparison')
    plt.legend()
    plt.savefig(name, bbox_inches='tight')
Пример #4
0
def calc_stats(bonds, density_vals, integral_1, integral_2=[0]):
    av_densities, std_dens, av_int_1_list, std_int_1_list, av_int_2_list, std_int_2_list = [], [], [], [], [], []
    for i, bond in enumerate(bonds):
        mask = np.logical_and(bonds[i] <= density_vals,
                              density_vals < bonds[i + 1])
        density_vals = density_vals[mask]
        av_int_1 = integral_1[mask]
        av_int_2 = integral_2[mask]
        av_density, std_density = np.mean(density_vals), tstd(density_vals)
        av_int_1, std_int_1 = np.mean(av_int_1), tstd(av_int_1)
        av_int_2, std_int_2 = np.mean(av_int_2), tstd(av_int_2)
        av_densities.append(av_density)
        std_dens.append(std_density)
        av_int_1_list.append(av_int_1_list)
        std_int_1_list.append(std_int_1)
        av_int_2_list.append(av_int_2_list)
        std_int_2_list.append(std_int_2)
    average_dict = {
        'density_vals': np.asarray(av_densities),
        'std_dens': np.asarray(std_dens),
        'int_1': np.asarray(av_int_1_list),
        'std_1': np.asarray(std_int_1_list),
        'int_2': np.asarray(av_int_2_list),
        'std_2': np.asarray(std_int_2_list)
    }
    return average_dict
Пример #5
0
 def compute_ttest_for_col(self, p_thresh):
     res_4df = {'features': [], 'ttest': [], 'welch': []}
     res = dict()
     for col in self.ls_cols:
         group1 = self.df[self.df[self.group_col] == self.groups[0]][col]
         group2 = self.df[self.df[self.group_col] == self.groups[1]][col]
         ttest_eq_pop_var = stats.ttest_ind(group1, group2, equal_var=True)
         ttest_welch = stats.ttest_ind(group1, group2, equal_var=False)
         if ttest_eq_pop_var[1] < p_thresh:
             meas, struct = get_structure_measurement(
                 col, self.ls_meas, self.ls_struct)
             #print('{:<15} {}'.format(meas, struct))
             res[col] = {
                 '{}, mean'.format(self.groups[0]): stats.tmean(group1),
                 '{}, std'.format(self.groups[1]): stats.tstd(group2),
                 '{}, mean'.format(self.groups[1]): stats.tmean(group2),
                 '{}, std'.format(self.groups[1]): stats.tstd(group2),
                 'ttest': ttest_eq_pop_var[1],
                 'welch': ttest_welch[1],
                 'kurtosis': stats.kurtosis(self.df[self.group_col]),
                 'skewness': stats.skew(self.df[self.group_col])
             }
             res_4df['features'].append(struct + ' (' + meas + ')')
             res_4df['ttest'].append(ttest_eq_pop_var[1])
             res_4df['welch'].append(ttest_welch[1])
     self.save_res(res_4df)
     return res
Пример #6
0
	def stdtrigger(img,framenum=1,threshold=threshold,message=False):
		try:
			global avg
			global std
			global xstart
			xstart=width/4
			global ystart
			ystart=height/4
			global sumlist
		except:
			sys.exit("error getting globals in stdtrigger")
		if message:
			if message=="update":
				sumlist[0]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum()
                                std=stats.tstd(sumlist)
                                avg=sum(sumlist)/n
				return
			sumlist=[None]*n
			def calibcam(n,sumlist):
				stream=io.BytesIO()
				for i in range(n):
					yield stream
					stream.seek(0)
					fwidth = (width + 31) // 32 * 32
				        fheight = (height + 15) // 16 * 16
					#starttime=time.time()
					img=np.fromstring(stream.getvalue(), dtype=np.uint8).reshape((fheight, fwidth, 3))[:height, :width, :]
					#print str((time.time()-starttime)*1000)+" ms for reading the image"
					#xstart=img.shape[0]/4
					#ystart=img.shape[1]/4
					#starttime=time.time()
					sumlist[i]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum()
					#print str((time.time()-starttime)*1000)+" ms"
					stream.seek(0)
					stream.truncate()
					sys.stdout.write("\r"+str(float(i+1)*100/n)+"%        ")
					sys.stdout.flush()
	
			print "calibrating stdtrigger"
			with PiCamera() as cam:
				cam.resolution=(width,height)
				cam.framerate=80
				time.sleep(.3)
				cam.capture_sequence(calibcam(n,sumlist),"rgb",use_video_port=True)
			std=stats.tstd(sumlist)
			avg=sum(sumlist)/n
			return
		thisz=(img[xstart:(xstart*3),ystart:(ystart*3),:].sum()-avg)/std
		#print thisz
		if abs(thisz)>threshold:
			print "\nsomething weird, zscore="+str(thisz)+" at "+time.strftime("%a, %d %b %H:%M:%S", time.localtime())
			return True

                if framenum%10==0:
                        sumlist[0]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum()
                        std=stats.tstd(sumlist)
                        avg=sum(sumlist)/n

		return False
Пример #7
0
def get_slope_error(x, y, students_t=2):
    """Return error of the slope of the line given by (x, y)"""
    assert len(x) == len(y), 'Different input lengths'
    slope = stats.linregress(x, y).slope
    D_y = stats.tstd(y)**2
    D_x = stats.tstd(x)**2
    # Formula from the MIPT lab manual
    return students_t * np.sqrt(1 / (len(x) - 2) * (D_y / D_x - slope**2))
Пример #8
0
def visualize(test_name, files, columns, step_win=1000, smooth_win=101):
    band_list = []
    file_list = []
    for f in files:
        if isinstance(f, list):
            band_list.append(len(f))
            file_list = file_list + f
        else:
            band_list.append(1)
            file_list.append(f)
    data = get_data(file_list,
                    columns,
                    step_win=step_win,
                    smooth_win=smooth_win)

    for col, d in data:
        plt.clf()

        count = 0
        i = 0
        while i < len(d):
            if band_list[count] == 1:
                n, v = d[i]
                plt.plot(v, label=n)
            else:
                n = d[i][0]
                data_lists = [x[1] for x in d[i:i + band_list[count]]]
                max_len = max(len(x) for x in data_lists)
                data_list = []
                for j in range(max_len):
                    data_list.append([])
                    for l in data_lists:
                        if j < len(l):
                            data_list[-1].append(l[j])
                high, low, avg = [], [], []
                for v in data_list:
                    avg.append(sum(v) / len(v))
                    high.append(avg[-1] + stats.tstd(v))
                    low.append(avg[-1] - stats.tstd(v))
                plt.plot(list(range(0,
                                    len(avg) * step_win, step_win)),
                         avg,
                         label=n)
                plt.fill_between(list(range(0,
                                            len(avg) * step_win, step_win)),
                                 low,
                                 high,
                                 alpha=.2)
            i += band_list[count]
            count += 1

        plt.legend(loc=0)
        # plt.title(test_name)
        plt.ylabel(col)
        plt.xlabel('Steps')
        plt.savefig('images/' + test_name + '_' + col + '.png')
Пример #9
0
def write_transforms_to_file(transforms,filename="dummy_transforms.txt",min_pairs=3,p_level=0.05,std_min=0.0,id_assays=True,full_info=False):
    """
Write selected transformations to file.
min_pairs  : Minimum number of pairs per transformations
p_level    : Maximum p_value
std_min    : Minimum Standard deviation of differences within pairs
id_assays  : separately output statistics for using pairs from identical assays only
    """

    print "Writing significant transformations to file"
    if min_pairs < 2:
        print "At least 2 pairs per transformation are necessary for significance tests."
        print "min_pairs set to 2"
        min_pairs = 2

    header = "Transformation\tAssay_specific\tp-value\tAverage_Activity_Difference\tSigma_Differences\tnpairs"
    if full_info: header = header+"\tLigand_IDs\tlog(Activities[nM])\tAssay_Identity"
    header = header+"\n"
    f = open(filename,"w")
    f.write(header)

    for transf,pairs in transforms.iteritems():
        if len(pairs["ligand_ids"]) < min_pairs: continue
        diffs = pairs["deltas"]
        npairs_all = len(diffs)
        p_all = stats.ttest_rel(diffs,[0.0 for i in diffs])[1]
        av_all = sum(diffs)/npairs_all
        std_all = stats.tstd(diffs)
        if npairs_all >= min_pairs and p_all <= p_level and std_all >= std_min:
            f.write(transf+"\t"+"mixed_assays"+"\t"+"{:4.2}".format(p_all)+"\t"+"{:4.3}".format(av_all)+"\t"+"{:4.2}".format(std_all)+"\t"+str(npairs_all))
            if full_info:
                for i in range(npairs_all): f.write("\t"+pairs["ligand_ids"][i][0]+":"+pairs["ligand_ids"][i][1])
                for i in range(npairs_all): f.write("\t"+"{:4.3}".format(pairs["activities1"][i])+":"+"{:4.3}".format(pairs["activities2"][i]))
                for i in range(npairs_all): f.write("\t"+str(pairs["assay_identity"][i]))
            f.write("\n")
        if id_assays == False: continue
        diffs_id = list(set([pairs["deltas"][i] for i in range(npairs_all) if pairs["assay_identity"][i]]))
        npairs_id = len(diffs_id)
        if npairs_id < min_pairs: continue
        p_id = stats.ttest_rel(diffs_id,[0.0 for i in diffs_id])[1]
        av_id = sum(diffs_id)/npairs_id
        std_id = stats.tstd(diffs_id)
        if npairs_id >= min_pairs and p_id <= p_level and std_id >= std_min:
            f.write(transf+"\t"+"ident_assays"+"\t"+"{:4.2}".format(p_id)+"\t"+"{:4.3}".format(av_id)+"\t"+"{:4.2}".format(std_id)+"\t"+str(npairs_id))
            if full_info:
                for i in range(npairs_all):
                    if pairs["assay_identity"][i] == True: f.write("\t"+pairs["ligand_ids"][i][0]+":"+pairs["ligand_ids"][i][1])
                for i in range(npairs_all):
                    if pairs["assay_identity"][i] == True:f.write("\t"+"{:4.3}".format(pairs["activities1"][i])+":"+"{:4.2}".format(pairs["activities2"][i]))
                for i in range(npairs_all):
                   if pairs["assay_identity"][i] == True:f.write("\t"+str(pairs["assay_identity"][i]))
            f.write("\n")

    f.close()
Пример #10
0
def get_std_deviation(img):
    path = images_path + img
    img = cv2.imread(path)
    hist_b = cv2.calcHist([img], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([img], [1], None, [256], [0, 256])
    hist_r = cv2.calcHist([img], [2], None, [256], [0, 256])

    tstd_b = tstd(hist_b)
    tstd_g = tstd(hist_g)
    tstd_r = tstd(hist_r)

    return [tstd_r[0], tstd_g[0], tstd_b[0]]
    def fit(self, x, y):
        self.x = x
        self.y = y
        self.mean_x = float('%.02f' % statistics.mean(self.x))
        self.mean_y = float('%.02f' % statistics.mean(self.y))
        self.stdev_x = float('%.02f' % stats.tstd(self.x))
        self.stdev_y = float('%.02f' % stats.tstd(self.y))
        self.corr_coeff, self.p_value = stats.pearsonr(self.x, self.y)
        self.corr_coeff = float('%.2f' % self.corr_coeff)

        self.slope = self.corr_coeff * (self.stdev_y / self.stdev_x)
        self.intercept = self.mean_y - (self.slope * self.mean_x)
Пример #12
0
        def train_models(slope_history):
            if len(slope_history) < 3:
                return None
            #slope_histroy = list of tuples: (avg_outdoor, slope, std_err,temperature_profile[0,0])
            sh = np.matrix(slope_history)
            lnmodel = LinearRegression()
            error_inverse = np.array(1 / sh[:, 2])[:, 0]
            lnfit = lnmodel.fit(sh[:, 0], sh[:, 1])
            #svr_rbf = SVR(kernel='linear', C=10, epsilon=0.5)
            #svrfit = svr_rbf.fit(sh[:, 0], sh[:,1])

            ln_residue = []
            for i in range(len(slope_history)):
                p = lnfit.predict(slope_history[i][0])[0][0]
                ln_residue.append((p - slope_history[i][1])**2)

            ln_std = stats.tstd(ln_residue)

            ln_mean = stats.tmean(ln_residue)
            new_sh = None
            for i in range(len(ln_residue)):
                if ln_residue[i] < ln_mean + 3 * ln_std:
                    #sh = np.delete(sh,i,axis=0)
                    if new_sh is None:
                        new_sh = sh[i, :]
                    else:
                        new_sh = np.vstack((new_sh, sh[i, :]))

            sh = new_sh
            #redo the fit
            error_inverse = np.array(1 / sh[:, 2])[:, 0]

            slope_mean = stats.tmean(sh[:, 1])
            slope_std = stats.tstd(sh[:, 1])

            lnfit = lnmodel.fit(sh[:, 0], sh[:, 1])
            ln_residue = []
            for i in range(len(sh)):
                p = lnfit.predict(sh[i, 0])[0][0]
                ln_residue.append((p - sh[i, 1])**2)

            ln_std = stats.tstd(ln_residue)
            ln_mean = stats.tmean(ln_residue)

            return {
                'ln_model': lnfit,
                'ln_residue': ln_residue,
                'ln_residue_std': ln_std,
                'ln_residue_mean': ln_mean,
                'slope_mean': slope_mean,
                'slope_std': slope_std,
                'data_matrix': sh
            }
Пример #13
0
def prediction_interval(x, alpha=0.05, type="two-sided", k_future_obs=1):
    """Compute an interval to contain future measurements with given confidence
    
    Parameters
    ----------
    x : array_like
        Input array or object that can be converted to an array.
    alpha : float
        Individual comparison false positive rate. Default value is 0.05.
    type : string
        The type of interval to be returned. Upper, lower, or two-sided. Default is two-sided.
    k_future_obs: int
        Number of total future comparisons(e.g., number of wells multiplied by the number of analytes).
        
    Returns
    -------
    
    Notes
    -----
    
    Uses Bonferroni inequality method. 
    
    """

    if not isinstance(x, np.ndarray):
        x = np.asarray(x)

    alpha_s = alpha

    if k_future_obs > 1:
        alpha = alpha_s / k_future_obs

    if type == "two-sided":
        lpl = x.mean() - stats.tstd(x) * stats.t.ppf(
            1 - alpha / 2, x.size - 1) * np.sqrt(1 + 1 / x.size)
        upl = x.mean() + stats.tstd(x) * stats.t.ppf(
            1 - alpha / 2, x.size - 1) * np.sqrt(1 + 1 / x.size)

    if type == "upper":
        lpl = np.NINF
        upl = x.mean() + stats.tstd(x) * stats.t.ppf(
            1 - alpha, x.size - 1) * np.sqrt(1 + 1 / x.size)

    if type == "lower":
        lpl = x.mean() - stats.tstd(x) * stats.t.ppf(
            1 - alpha, x.size - 1) * np.sqrt(1 + 1 / x.size)
        upl = np.Inf

    return lpl, upl
 def write_current_stats(log_file, link_bandwidth_usage_Mbps, switch_num_flows, response_times, cur_group_index, group):
     link_bandwidth_list = []
     total_num_flows = 0
     
     for switch_dpid in link_bandwidth_usage_Mbps:
         for port_no in link_bandwidth_usage_Mbps[switch_dpid]:
             link_bandwidth_list.append(link_bandwidth_usage_Mbps[switch_dpid][port_no])
     
     for switch_dpid in switch_num_flows:
         total_num_flows += switch_num_flows[switch_dpid]
     
     avg_response_time = sum(response_times) / float(len(response_times))
     avg_network_time = sum(network_times) / float(len(network_times))
     avg_processing_time = sum(processing_times) / float(len(processing_times))
     
     average_link_bandwidth_usage = sum(link_bandwidth_list) / float(len(link_bandwidth_list))
     traffic_concentration = 0
     if average_link_bandwidth_usage != 0:
         traffic_concentration = max(link_bandwidth_list) / average_link_bandwidth_usage
     link_util_std_dev = tstd(link_bandwidth_list)
     
     log_file.write('Group:' + str(cur_group_index))
     log_file.write(' NumReceivers:' + str(len(group.dst_hosts)))
     log_file.write(' TotalNumFlows:' + str(total_num_flows))
     log_file.write(' MaxLinkUsageMbps:' + str(max(link_bandwidth_list)))
     log_file.write(' AvgLinkUsageMbps:' + str(average_link_bandwidth_usage))
     log_file.write(' TrafficConcentration:' + str(traffic_concentration))
     log_file.write(' LinkUsageStdDev:' + str(link_util_std_dev))
     log_file.write(' ResponseTime:' + str(avg_response_time))
     log_file.write(' NetworkTime:' + str(avg_network_time))
     log_file.write(' ProcessingTime:' + str(avg_processing_time))
     log_file.write('\n')
Пример #15
0
def print_stats(datums):
    print 'Mean:', stats.tmean(datums)
    print 'Median:', stats.cmedian(datums)
    print 'Std Dev:', stats.tstd(datums)
    print 'Variation:', stats.variation(datums)
    print 'Kurtosis:', stats.kurtosis(datums, fisher=False)
    print 'Skewness:', stats.skew(datums)
Пример #16
0
def get_generation_stats(population, environment):
    average_fitness = get_average_fitness(population, environment)
    best_org = get_best_organism(population, environment)
    best_fitness = best_org.fitness(environment)
    stdev = stats.tstd([org.fitness(environment) 
                        for org in population])
    return average_fitness, stdev, best_org, best_fitness
Пример #17
0
def calculating_season_stats(df_X_train):
    bat_avg = (df_X_train.groupby(['batsman_striker', 'season',
                                   'career_age']).sum()).reset_index()
    bat_avg['runs_per_match_avg'] = bat_avg['runs_scored'] / bat_avg['matches']
    bat_avg = bat_avg[[
        'player_id', 'batsman_striker', 'season', 'age', 'runs_per_match_avg'
    ]]
    bat_avg = (bat_avg.pivot_table(index=['player_id', 'batsman_striker'],
                                   columns=['season'],
                                   values=['runs_per_match_avg', 'age']))
    bat_avg = bat_avg.reset_index()

    #calculating standard deviation of runs per match through out the career upto target year
    std = bat_avg.reindex_axis(sorted(bat_avg.columns), axis=1)
    std.drop(std.columns[[0, 1, 2, 3, 4]], axis=1, inplace=True)
    std = std.T.fillna(bat_avg.mean(axis=1)).T
    #adding mean to not nan seasons(season missed by players)

    std_l = []
    for i in range(len(std)):
        std_l.append(stats.tstd(std.iloc[i][3:10]))
    std['std'] = std_l
    std = std[['player_id', 'batsman_striker', 'std']]
    bat_avg_std = bat_avg.merge(std, on=['batsman_striker'])
    bat_avg_std = bat_avg_std.rename(columns={
        'player_id_x': 'player_id'
    }).drop('player_id_y', axis=1)
    return bat_avg_std
Пример #18
0
def passos(xa, ya, xmax, ymax, series=1000):
    '''Retorna a média de tantas séries de quantos passos o bêbado leva pra chegar em tal ponto.'''
    lista_passos = []
    for i in range(series):
        x, y = 0, 0
        passos = 1
        while x != xa and y != ya:
            a = direcao()
            if a == 'N':
                passos += 1
                y += 1
                if y >= ymax:
                    y = ymax
            elif a == 'S':
                passos += 1
                y -= 1
                if abs(y) >= ymax:
                    y = -ymax
            elif a == 'O':
                passos += 1
                x -= 1
                if abs(x) >= xmax:
                    x = -xmax
            else:
                passos += 1
                x += 1
                if x >= xmax:
                    x = xmax
        lista_passos.append(passos)
    print('Média=' + str(round(stats.tmean(lista_passos), 5)) +
          '\nDesvio-padrão=' + str(round(stats.tstd(lista_passos), 5)))
Пример #19
0
def main():
    train_df = pd.read_csv('data/match_feature.csv')

    # Split X, y and scale
    X, y, min_max_scaler = get_X_y(train_df)
    print("Total dataset size : ", len(X))

    #check_train_size_curve(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.01,
                                                        random_state=42)

    model = Lgbm_Model()
    #param = model.tune(X_train, y_train)
    model.train(X_train, y_train, X_test, y_test)
    #model.save("Lgbm")
    model.evaluate(X_train, y_train, cross_val=True)
    model.evaluate(X_test, y_test)

    res = model.predict(X_train)
    print(tstd(res))
    print(tmean(res))

    plt.hist(res, bins=100)
    plt.show()

    model.plot_importance()
Пример #20
0
def get_generation_stats(population, environment):
    """Gets the stats for the given population"""
    average_fitness = get_average_fitness(population, environment)
    best_org = get_best_organism(population, environment)
    best_fitness = best_org.fitness(environment)
    stdev = stats.tstd([org.fitness(environment) for org in population])
    return average_fitness, stdev, best_org, best_fitness
Пример #21
0
    def std(self, **kwargs):
        """
        Unbiased standard deviation of time series.

        Parameters
        ----------
        kwargs : optional
            see documentation of :meth:`get()` method for available options

        Returns
        -------
        float
            Sample standard deviation

        Notes
        -----
        Computes the unbiased sample standard deviation, i.e. it uses a correction factor n / (n - ddof).

        See also
        --------
        scipy.stats.tstd

        """
        # get data array, time does not matter
        _, x = self.get(**kwargs)
        return tstd(x)
Пример #22
0
    def calculation(self):
        self.clear_line()
        filename = self.entry_0_string.get()
        fileString = fisher.open_file(filename)  # Получаем путь файла
        if fileString[0] == 0:
            arr = list(map(float, fileString[1].split(' ')))
        else:
            self.show_warning(fileString[1])
            self.clear_line()
            return 1
        answer_3 = len(arr)  #количество элементов выборки
        answer_4 = fisher.normalizeFloat(
            np.mean(arr))  # Среднее арифметическое значение
        answer_5 = fisher.normalizeFloat(
            stats.tstd(arr))  # Оценка среднего квадратического отклонения

        name = self.entry_1_string.get()
        countSplit = fisher.inputCountSplit(name, len(arr))
        if countSplit[0] == 0:
            arr = np.array_split(arr, countSplit[1])
        else:
            self.show_warning(countSplit[1])
            self.clear_line()
            return 1

        answer_6 = fisher.normalizeFloat(
            fisher.MSA(arr))  # Межгрупповая дисперсия
        answer_7 = fisher.normalizeFloat(
            fisher.MSW(arr))  # Внутригрупповая дисперсия

        fCritery = fisher.getFishersCritery(arr)
        answer_8 = fisher.normalizeFloat(fCritery)  # Критерий Фишера

        significanceLevel = self.entry_2_string.get()
        significanceLevel = fisher.inputSignificanceLevel(significanceLevel)
        if significanceLevel[0] == 0:
            fCriticalCritery = fisher.getCriticalFishersCritery(
                (significanceLevel[1]), arr)
            answer_9 = fisher.normalizeFloat(
                fCriticalCritery)  # Критический критерий Фишера

        else:
            self.show_warning(significanceLevel[1])
            self.clear_line()
            return 1

        self.entry_3_string.set(
            answer_3)  # Выводим количество элементов выборки
        self.entry_4_string.set(answer_4)  # Среднее арифметическое значение
        self.entry_5_string.set(
            answer_5)  # Оценка среднего квадратического отклонения
        self.entry_6_string.set(answer_6)  # Межгрупповая дисперсия
        self.entry_7_string.set(answer_7)  # Внутригрупповая дисперсия
        self.entry_8_string.set(answer_8)  # Критерий Фишера
        self.entry_9_string.set(answer_9)  # Критический критерий Фишера

        summary = fisher.summary(fCritery, fCriticalCritery)
        self.text_1.insert(1.0, summary)

        return filename
Пример #23
0
	def __init__(self, parent, figure, sel, plot_t, cap):
		self.parent = parent
		self.cap = cap
		self.data = []
		self.x_labels = []
		self.selector = sel
		
		for day in Plot.day_order:
			data = self.parent.data.get(sel[0], day, sel[1], sel[2])
			if not len(data):
				text = "%s\nx̅: %s\nCol/img: %s\nσ: %s" % (
					day, 0, 0, 0)
				self.x_labels.append(text)
				self.data.append(Plot.cap([0], self.cap))
				continue
			c_normal = parent.image_n / len(parent.image_counter.out["%s,%s" % (sel[0], day)])
			current_data = normalize_data(data, c_normal)
			gm = round(float(stats.gmean(current_data)), 3)
			cpi = round(len(current_data) * c_normal, 3)
			sd = round(float(stats.tstd(current_data)), 3)
			text = "%s\nx̅: %s\nCol/img: %s\nσ: %s" % (
				day, gm, cpi, sd)
			self.x_labels.append(text)
			self.data.append(Plot.cap(current_data, self.cap))
		
		self.figure = figure
		self.plot_tuple = plot_t
		self.graph_label = "%s population, %s %s" % (sel[0], sel[1], sel[2].replace("supra", "supra-basal"))
Пример #24
0
def num_features_DDbS(samplefile_, n_):
    _configs = list()
    init = True

    if os.path.exists(samplefile_):
        with open(samplefile_, 'r') as sf:
            for line in sf:
                if not init:
                    raw = line.split(";")
                    if len(raw) != 0:
                        raw = raw[1:]
                        config = list()
                        for i in range(0, len(raw)):
                            if raw[i] == '1':
                                config.append(i + 1)
                        _configs.append(config)
                else:
                    init = False
    else:
        return -1

    _fnums = list()
    for sample in _configs:
        fnum = 0
        for v in sample:
            if v > 0:
                fnum += 1
        _fnums.append(fnum)

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #25
0
    def make_frame(t):
        """ returns an image of the frame at time t """
        # ... create the frame with any library
        fitness = fitness_list[int(t)]
        __sum_fit = sum(fitness)
        __mean_fit = float(__sum_fit) / float(len(fitness))
        from scipy.stats import tstd, iqr, variation, entropy
        __sd_fit = tstd(fitness)
        __iqr = iqr(fitness)
        __v = variation(fitness)
        __e = entropy(fitness)

        fig = plt.figure()
        plt.hist(fitness)  # ,bins=int(params['POPULATION_SIZE']*0.1))
        plt.title("Moving Point - Population Fitness Histogram - Generation " +
                  str(int(t)))
        plt.axis([0, 20000, 0, params['POPULATION_SIZE']])
        plt.ylabel('#Individuals')
        plt.xlabel('Fitness')
        plt.grid(True)
        __hist_text = "$\mu=" + "{0:.2f}".format(
            __mean_fit) + ",\ \sigma=" + "{0:.2f}".format(
                __sd_fit) + ",\ entropy=" + "{0:.2f}".format(
                    __e) + ",\ iqr=" + "{0:.2f}".format(__iqr) + "$"
        plt.text(1000, params['POPULATION_SIZE'] * .9, __hist_text)
        return mplfig_to_npimage(fig)  # (Height x Width x 3) Numpy array
Пример #26
0
def plot_gauss(no):
    seq = 1
    if no==1:
        n = str(no)
        n = n.zfill(3)
        imtype ='IIM'+n
    elif no<1000:
        n = str(no)
        n = n.zfill(3)
        imtype = 'IIM'+n
    else:
        imtype = 'II'+str(no)

    print 'doing '+imtype
    testImg = WAIPSImage('J0528+2200', imtype, 1, seq,43)
    dat = testImg.pixels.flatten()
    end_1, end_2 = np.array([dat.size*0.1,dat.size*0.9],dtype=int)
    mu=np.mean(dat[end_1:end_2])
    sigma=stats.tstd(dat[end_1:end_2])
    peak = np.max(dat)
    print 'peak:', np.max(dat), 'rms: ', sigma, 'snr: ', peak/sigma
    plt.figure()
    n,bins,patches = plt.hist(dat,100,normed=1,histtype='stepfilled')
    plt.setp(patches,'facecolor','g','alpha',0.75)
    y = plt.normpdf(bins,mu,sigma)
    plt.plot(bins,y,'k--',linewidth=1.5)
    plt.show()
Пример #27
0
def determine_nbins1D(X, rule='Sturges'):
    '''
    There are three common methods to determine the number of bins to compute entropy of ONE variable X
    :param X: array-like of numbers
    :param rule:    1) Freedman‐Diaconis's rule: used for unknown distributions or non-parametric
                            nbins = ceil(max(X) - min(X) / 2 * IQR * N^{-1/3})
                    2) Scotts's Rule: used for normal distribution
                            nbins = ceil(max(X) - min(X) / 3.5 * STD * N^{-1/3})
                    3) Sturges' Rule
                            nbins = ceil(1 + log2(n))
            
            default: Sturges's rule
    
    :return: the optimal number of bins used to calculate entropy
    '''
    maxmin_range = max(X) - min(X)
    n = len(X)
    n3 = n**(-1 / 3)
    if rule == 'Freedman‐Diaconis':
        return np.ceil(maxmin_range / (2.0 * iqr(X) * n3)).astype('int')
    if rule == 'Scott':
        return np.ceil(maxmin_range / (3.5 * tstd(X) * n3)).astype('int')
    if rule == 'Sturges':
        return np.ceil(1 + np.log2(n)).astype('int')
    return 0
 def write_current_stats(log_file, link_bandwidth_usage_Mbps, switch_num_flows, response_times, cur_group_index, group):
     link_bandwidth_list = []
     total_num_flows = 0
     
     for switch_dpid in link_bandwidth_usage_Mbps:
         for port_no in link_bandwidth_usage_Mbps[switch_dpid]:
             link_bandwidth_list.append(link_bandwidth_usage_Mbps[switch_dpid][port_no])
     
     for switch_dpid in switch_num_flows:
         total_num_flows += switch_num_flows[switch_dpid]
     
     avg_response_time = sum(response_times) / float(len(response_times))
     avg_network_time = sum(network_times) / float(len(network_times))
     avg_processing_time = sum(processing_times) / float(len(processing_times))
     
     average_link_bandwidth_usage = sum(link_bandwidth_list) / float(len(link_bandwidth_list))
     traffic_concentration = 0
     if average_link_bandwidth_usage != 0:
         traffic_concentration = max(link_bandwidth_list) / average_link_bandwidth_usage
     link_util_std_dev = tstd(link_bandwidth_list)
     
     log_file.write('Group:' + str(cur_group_index))
     log_file.write(' NumReceivers:' + str(len(group.dst_hosts)))
     log_file.write(' TotalNumFlows:' + str(total_num_flows))
     log_file.write(' MaxLinkUsageMbps:' + str(max(link_bandwidth_list)))
     log_file.write(' AvgLinkUsageMbps:' + str(average_link_bandwidth_usage))
     log_file.write(' TrafficConcentration:' + str(traffic_concentration))
     log_file.write(' LinkUsageStdDev:' + str(link_util_std_dev))
     log_file.write(' ResponseTime:' + str(avg_response_time))
     log_file.write(' NetworkTime:' + str(avg_network_time))
     log_file.write(' ProcessingTime:' + str(avg_processing_time))
     log_file.write('\n')
Пример #29
0
  def compute_metric(self):

    tmid=(self.ts.t[:-1]+self.ts.t[1:])/2.0
    rng=range(1,len(tmid)) # Throw out first and last
    self.tmid=tmid[rng]         
    
    maxval=numpy.zeros(len(rng))
    minval=numpy.ones(len(rng))*1e100

    self.rate=[]
    for v in self.ts:
      self.rate.append(numpy.divide(numpy.diff(v)[rng],
                                    numpy.diff(self.ts.t)[rng]))
      maxval=numpy.maximum(maxval,self.rate[-1])
      minval=numpy.minimum(minval,self.rate[-1])

    vals=[]
    mean=[]
    std=[]
    for j in range(len(rng)):
      vals.append([])
      for v in self.rate:
        vals[j].append(v[j])
      mean.append(tmean(vals[j]))
      std.append(tstd(vals[j]))

    imbl=maxval-minval

    self.ratio=numpy.divide(std,mean)
    self.ratio2=numpy.divide(imbl,maxval)

    # mean of ratios is the threshold statistic
    self.metric = abs(tmean(self.ratio))
def plot_heatmap(matrix, ax, label):
    vmax = np.max(matrix)
    vmin = np.min(matrix)
    vextreme = max(abs(vmin), vmax)
    k = kurtosis(matrix.flat)
    try:
        std = tstd(matrix.flat)
    except ZeroDivisionError:
        std = 0
    #print "k: {} std: {}".format(k, std)
    args = {'vmax':vextreme,
            'vmin':-vextreme,
            'interpolation':'none',
            'aspect':'auto',
            'origin':'lower',
            'cmap':plt.get_cmap('RdBu')} #Spectral
    if k > 15:
        norm = SymLogNorm(std/3.0, vmin=-vextreme, vmax=vextreme)
        args['norm'] = norm
        label = "Symmetric log of " + label
    if len(matrix.shape) == 1:
        matrix = np.tile(matrix, (1, 2))
    plt.imshow(matrix, **args)
    ax.set_title(format_name(label))
    ax.set_frame_on(False)
    plt.axis('off')
    # ax.grid(False)
    ax.invert_yaxis()
    cb = plt.colorbar()
    if k > 15:
        ticks = np.linspace(0, 1, 9)
        tick_map = norm.inverse(ticks)
        cb.set_ticks(tick_map)
        cb.set_ticklabels(["{:.4g}".format(t) for t in tick_map])
Пример #31
0
def iters(N):

    i = 0
    simulations = []
    while i < N:
        ith_iter = single('off')
        simulations.append(ith_iter)
        print('$', ith_iter)
        i += 1

    data = simulations

    'plot density histogram'
    plt.figure()
    wts = np.ones_like(data) / float(len(data))
    plt.hist(data,
             stacked=True,
             weights=wts,
             edgecolor='k',
             color='darkorange')
    average = int(np.mean(data))
    plt.suptitle('<Closet value>: $ {} +- {} (std. dev.)'.\
                 format(average,int(tstd(data))),size=13)

    plt.xlabel(r"Total closet values for {} simulation(s)  /  $".format(N),
               size=13)
    plt.ylabel('P', size=13)
    plt.show()
Пример #32
0
def num_features_QS(samplefile_, n_):
    i = 0

    _configs = list()
    if os.path.exists(samplefile_):
        with open(samplefile_, 'r') as sf:
            for line in sf:
                raw = line.split(" ")
                if len(raw) != 0:
                    config = raw[:len(raw) - 1]
                    _configs.append(config)
                i += 1

    else:
        return -1

    _samples = list()
    rands = get_random(n_, len(_configs))
    for r in rands:
        _samples.append(_configs[r - 1])

    _fnums = list()
    for sample in _samples:
        fnum = 0
        for v in sample:
            if not v.startswith('-'):
                fnum += 1
        _fnums.append(fnum)

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #33
0
 def calc_moments(self, periods=252):
     if self.model.current_step - 1 < periods:
         return None, None, None
     else:
         rets = self.model.datacollector.model_vars["Return"][-periods-1:]
         rets.append(self.last_return)
         return tstd(rets), skew(rets), kurtosis(rets)
Пример #34
0
def player_info(accountId):

    try:
        summoner = watcher.summoner.by_account(my_region, accountId)
        summonerId = summoner['id']
        league = watcher.league.by_summoner(my_region, summonerId)[0]
    except:
        return None, None, None, None, None

    level = summoner['summonerLevel']
    total_win = league['wins']
    total_loss = league['losses']
    hot_streak = int(league['hotStreak'])

    data = [1] * total_win + [0] * total_loss
    win_skew = skew(data)
    win_std = tstd(data)
    win_mean = tmean(data)
    '''
    match_lst = watcher.match.matchlist_by_account(
        my_region, accountId, end_index=30, queue='420')
    for match in match_lst:
        print(match)
    '''

    return win_mean, win_std, win_skew, level, hot_streak
Пример #35
0
    def norm_fit_sparsely(self,
                          show_it=0,
                          save_it=0,
                          save_dir=None,
                          save_name=None,
                          start=0,
                          end=0):
        if not end: print('norm fit:asign end')
        _sparseness = 10**4
        _start = int(start)
        _end = int(end)
        _data_num = _end - _start
        print(
            223, 'int(data_num/sparseness + 1):{}, data_num:{}'.format(
                int(_data_num / _sparseness + 1), _data_num))
        _cur_x = [
            self.x[i]
            for i in range(_start, _end, int(_data_num / _sparseness + 1))
        ]

        _guess = [stats.tmean(self.x), stats.tstd(self.x)]
        _x = _cur_x
        _x.sort()
        self.norm_params, self.norm_params_covariance = optimize.curve_fit(
            self.norm_dist_CDF, _x,
            [(i + 1) / len(_x) for i in range(len(_x))], _guess)
        self.hist_norm_of_move_sparsely(show_it=show_it,
                                        save_it=save_it,
                                        save_dir=save_dir,
                                        save_name=save_name,
                                        start=start,
                                        end=end)
Пример #36
0
    def std(self, **kwargs):
        """
        Unbiased standard deviation of time series.

        Parameters
        ----------
        **kwargs : optional
            Additional keyword arguments are passed to :meth:`get()`.

        Returns
        -------
        float
            Sample standard deviation

        Notes
        -----
        Computes the unbiased sample standard deviation, i.e. it uses a correction factor n / (n - ddof).

        See also
        --------
        scipy.stats.tstd

        """
        # get data array, time does not matter
        _, x = self.get(**kwargs)
        return tstd(x)
Пример #37
0
def sort_spikes(dataframe, analog_data, standardize=False):
    """
    Sorts spikes in dataframe for the given analog_data in place. Spikes are
    sorted by the first two principal components after the waveforms have been
    smoothed and up-sampled. Cluster analysis is done using the OPTICS density
    based clustering algorithm. An appropriate epsilon is found by looking for
    significant peaks in the reachability plot.

    Parameters
    ----------
    dataframe : pandas.DataFrame
        DataFrame of spike data.
    analog_data : MEARecording
        The MEARecording for the spikes given in dataframe.
    standardize : bool
        If True, standardize data before cluster finding.

    """
    for (tag, sdf) in dataframe.groupby('electrode'):
        waveforms = extract_waveforms(bandpass_filter(analog_data[tag]),
                                      sdf.time.values)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=RuntimeWarning)
            pcs = PCA(n_components=2).fit_transform(waveforms)
            if standardize:
                pcs = StandardScaler().fit_transform(pcs)

        opt = optics.OPTICS(300, 5)
        opt.fit(pcs)

        reach = opt._reachability[opt._ordered_list]
        rprime = reach[np.isfinite(reach)]
        if len(rprime) < 2:
            continue
        try:
            thresh = 8.5 * stats.tstd(rprime, (np.percentile(
                rprime, 15), np.percentile(rprime, 85))) + np.median(
                    rprime)  # noqa
        except:
            continue
        peaks = peak_local_max(reach,
                               min_distance=4,
                               threshold_abs=thresh,
                               threshold_rel=0).flatten()
        # Find largest peak for close neighbors
        min_dist = 0.05 * len(reach)
        splits = np.where(np.diff(peaks) > min_dist)[0] + 1
        peak_vals = [
            np.max(x) for x in np.split(reach[peaks], splits) if len(x) > 0
        ]
        try:
            eps = 0.90 * np.min(peak_vals)
        except:
            eps = 0.5 * reach[-1]

        opt.extract(eps)

        dataframe.loc[sdf.index, 'electrode'] = \
            sdf.electrode.str.cat(opt.labels_.astype(str), sep='.')
 def sd(self, samples=100):
     vals = []
     for i in range(samples):
         solution = []
         for j in range(self.arglen):
             solution.append(random.randrange(*self.range_))
         vals.append(self.fitness1(solution))
     return stats.tstd(vals)
Пример #39
0
def post_credible_interval(data, confidence=0.95):    # compute posterior credible interval
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), stats.tstd(a)
    
    
    h = se * stats.t._ppf((1+confidence)/2., n-1)
    return m, m-h, m+h
Пример #40
0
 def norm_fit(self, show_it=0, save_it=0, save_dir=None, save_name=None):
     _guess = [stats.tmean(self.x), stats.tstd(self.x)]
     _x = self.x
     _x.sort()
     self.norm_params, self.norm_params_covariance = optimize.curve_fit(
         self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess)
     self.hist_norm_of_move(
         show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name)
Пример #41
0
def print_and_plot_results(count, results, verbose, plot_file_name):
    print("RPS calculated as 95% confidence interval")

    rps_mean_ar = []
    low_ar = []
    high_ar = []
    test_name_ar = []

    for test_name in sorted(results):
        data = results[test_name]
        rps = count / array(data)
        rps_mean = tmean(rps)
        rps_var = tvar(rps)
        low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5)
        times = array(data) * 1000000 / count
        times_mean = tmean(times)
        times_stdev = tstd(times)
        print('Results for', test_name)
        print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,'
              '\tstandard deviation {:.3f} μs'
              .format(int(rps_mean),
                      int(low),
                      int(high),
                      times_mean,
                      times_stdev))

        test_name_ar.append(test_name)
        rps_mean_ar.append(rps_mean)
        low_ar.append(low)
        high_ar.append(high)

        if verbose:
            print('    from', times)
        print()


    if plot_file_name is not None:
        import matplotlib.pyplot as plt
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        L = len(rps_mean_ar)
        color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)]
        bars = ax.bar(
            arange(L), rps_mean_ar,
            color=color, yerr=(low_ar, high_ar), ecolor='k')
        # order of legend is reversed for visual appeal
        ax.legend(
            reversed(bars), reversed(test_name_ar),
            loc='upper left')
        ax.get_xaxis().set_visible(False)
        plt.ylabel('Requets per Second', fontsize=16)
        print(plot_file_name)
        plt.savefig(plot_file_name, dpi=96)
        print("Plot is saved to {}".format(plot_file_name))
        if verbose:
            plt.show()
Пример #42
0
def sort_spikes(dataframe, analog_data, standardize=False):
    """
    Sorts spikes in dataframe for the given analog_data in place. Spikes are
    sorted by the first two principal components after the waveforms have been
    smoothed and up-sampled. Cluster analysis is done using the OPTICS density
    based clustering algorithm. An appropriate epsilon is found by looking for
    significant peaks in the reachability plot.

    Parameters
    ----------
    dataframe : pandas.DataFrame
        DataFrame of spike data.
    analog_data : MEARecording
        The MEARecording for the spikes given in dataframe.
    standardize : bool
        If True, standardize data before cluster finding.

    """
    for (tag, sdf) in dataframe.groupby('electrode'):
        waveforms = extract_waveforms(
            bandpass_filter(analog_data[tag]), sdf.time.values)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', category=RuntimeWarning)
            pcs = PCA(n_components=2).fit_transform(waveforms)
            if standardize:
                pcs = StandardScaler().fit_transform(pcs)

        opt = optics.OPTICS(300, 5)
        opt.fit(pcs)

        reach = opt._reachability[opt._ordered_list]
        rprime = reach[np.isfinite(reach)]
        if len(rprime) < 2:
            continue
        try:
            thresh = 8.5*stats.tstd(rprime,
                                    (np.percentile(rprime, 15),
                                    np.percentile(rprime, 85))) + np.median(rprime)  # noqa
        except:
            continue
        peaks = peak_local_max(reach, min_distance=4,
                               threshold_abs=thresh,
                               threshold_rel=0).flatten()
        # Find largest peak for close neighbors
        min_dist = 0.05 * len(reach)
        splits = np.where(np.diff(peaks) > min_dist)[0] + 1
        peak_vals = [np.max(x) for x in np.split(reach[peaks], splits)
                     if len(x) > 0]
        try:
            eps = 0.90*np.min(peak_vals)
        except:
            eps = 0.5*reach[-1]

        opt.extract(eps)

        dataframe.loc[sdf.index, 'electrode'] = \
            sdf.electrode.str.cat(opt.labels_.astype(str), sep='.')
Пример #43
0
def detectchange(threshold,n):
	cam=cv2.VideoCapture(0)
	facedetector=cv2.CascadeClassifier("/usr/local/Cellar/opencv/2.4.9/share/OpenCV/haarcascades/haarcascade_frontalface_alt2.xml")
	sumlist=calibcam(n,cam)
	std=stats.tstd(sumlist)
	avg=sum(sumlist)/n
	i=0
	framenum=0
	print "starting detection"
	while True:
		retval,img=cam.read()
		thisz=(img[240:720,320:960,:].sum()-avg)/std
		if abs(thisz)>threshold:
			print "something weird, zscore="+str(thisz)
			time.sleep(1)
			retval,newimg=cam.read()
			#sumlist=calibcam(n,cam)
			#avg=sum(sumlist)/n
			#std=stats.tstd(sumlist)
			faces=facedetector.detectMultiScale(newimg)
			if len(faces)>0:
				print "FOUND A FACEZ!!!!!11!"
				for (x,y,h,w) in faces:
					cv2.rectangle(newimg,(x,y),(x+w,y+h),(0,255,255),1)
				#cv2.imshow("obj num "+str(i),newimg)
				cv2.imshow("obj found",newimg)
			else:
				print "no facez :("
				cv2.imshow("obj not found",newimg)
			i=i+1
			cv2.waitKey(1)
			sumlist[0]=img[240:720,320:960,:].sum()
			std=stats.tstd(sumlist)
			avg=sum(sumlist)/n
		framenum=framenum+1
		if framenum%10==0:
			sumlist[0]=img[240:720,320:960,:].sum()
			std=stats.tstd(sumlist)
			avg=sum(sumlist)/n
		if framenum%100==0:
			print framenum
		#time.sleep(.5)
Пример #44
0
def NormXCorr(s1, s2):
    """
    Computes the normalized cross correlation distance between two vector.

    Parameters:
        s1: `numpy array`
            The first vector.

        s2: `numpy array`
            The second vector.

    Returns: `float`
            NormXCorr distance between s1 and s2, dist is between [-1, 1].
            A value of one indicate a perfect match.
    """
    # s1 and s2 have the same length
    import scipy.stats as ss
    s = s1.shape[0]
    corr = np.sum((s1 - np.mean(s1)) * (s2 - np.mean(s2))) / (ss.tstd(s1) * ss.tstd(s2))
    return corr * (1./(s-1))
Пример #45
0
def calculate_weights(prev_population, prev_weights, sim_theta):
    from scipy.stats import tstd

    weights = np.array([])
    for i in range(param_number):
        rv = uniform(-5, 5)
        prior = rv.pdf(sim_theta[i])
        prod = []
        for w, th in izip(prev_weights[i], prev_population[i]):
            prod.append(w * norm(sim_theta[i], tstd(prev_population[i])).pdf(th))
        weights = np.append(weights, (0.1 / math.fsum(prod)))
    return weights
Пример #46
0
def sample_from_previous(prev_population, weights):
    from scipy.stats import tstd

    theta = np.array([])
    for i in range(param_number):
        weighted_mu = calc_weighted_mean(prev_population[i], weights[i])
        # sigma = 0.5 * (np.max(prev_population[i]) - np.min(prev_population[i]))
        sigma = tstd(prev_population[i])
        particle = np.random.normal(weighted_mu, sigma)
        pert_sigma = get_pert_sigma(prev_population[i])
        # pert_particle = np.random.normal(particle, pert_sigma)
        pert_particle = np.random.uniform(particle - pert_sigma, particle + pert_sigma)
        theta = np.append(theta, pert_particle)
    return theta
Пример #47
0
def getStd(flux,limits=(None,None),wave=None,wmin=None,wmax=None,minsize=20):

    '''
    Get the std of a flux array in a given wavelength range. If no min/max 
    wavelengths are given, the std of the whole array is given.
    
    If the array used for std calculation is too small, None is returned.
    
    A 1-sigma clipping of the flux array can be done by providing limits.
    
    @param flux: The wavelength array
    @type flux: array

    @keyword limits: Flux limits if flux clipping (1 sigma!) is needed before
                     STD calculation. None for both limits implies no clipping.
                     None for one of the limits implies a half-open interval.
                     
                     (default: (None,None))
    @type limits: (float,float)
    @keyword wave: The wavelength array. If default, the STD is calculated of 
                   the whole flux array
    @type wave: array    
    @keyword wmin: The minimum wavelength. If not given, the minimum wavelength 
                   is the first entry in the wave array
                    
                   (default: None)
    @type wmin: float
    @keyword wmin: The maximum wavelength. If not given, the maximum wavelength 
                   is the last entry in the wave array
                    
                   (default: None)               
    @type wmax: float
    @keyword minsize: The minimum size of the selected array before proceeding
                      with the noise calculation. 0 if no min size is needed.
                             
                      (default: 20)
    @type minsize: int
    
    @return: The flux std between given wavelengths
    @rtype: float
    
    '''
    
    fsel = selectArray(flux,wave,wmin,wmax)
    if fsel.size <= minsize:
        return None
    if limits == (None,None):
        return std(fsel)
    else:
        return tstd(fsel,limits=limits)
Пример #48
0
def getLineScoreStats(df,lineScoreCol,histScoreCol,binNumber=50):
	'''Return a Dataframe of line score stats for each bin. Relevant
	one is probably the mean.'''
	D = {}
	binnedScores = binLineScore(df,lineScoreCol,histScoreCol,binNumber)
	for bin in binnedScores:
		L = binnedScores[bin]
		if len(L) <=1:
			mean,var,dev = L[0],0,0
			continue
		mean = stats.tmean(L)
		var = stats.tvar(L)
		stanD = stats.tstd(L)
		D[bin] = {"mean":mean,"var":var,"stanDev.": stanD}
	return pd.DataFrame(D).T
Пример #49
0
    def norm_fit_sparsely(self, show_it=0, save_it=0, save_dir=None, save_name=None, start=0, end=0):
        if not end:print('norm fit:asign end')
        _sparseness=10**4
        _start=int(start)
        _end=int(end)
        _data_num=_end-_start
        print(223,'int(data_num/sparseness + 1):{}, data_num:{}'.format(int(_data_num/_sparseness+1), _data_num))
        _cur_x=[self.x[i] for i in range(_start,_end,int(_data_num/_sparseness+1))]

        _guess = [stats.tmean(self.x), stats.tstd(self.x)]
        _x = _cur_x
        _x.sort()
        self.norm_params, self.norm_params_covariance = optimize.curve_fit(
            self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess)
        self.hist_norm_of_move_sparsely(
            show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name, start=start, end=end)
Пример #50
0
def paired_diff_t_test(a,b):
    assert len(a) == len(b)
    diffs = [a[i]-b[i] for i in xrange(len(a))]
    x_d = stats.tmean(diffs)
    s_d = stats.tstd(diffs)
    n = len(diffs)
    dof = n-1
    t_d = x_d / (s_d / n)

    # sf() is the survival function (1-cdf)
    pval = stats.t.sf(abs(t_d), dof)

    print
    print 't-statistic:\t%.4f' % (t_d)
    print 'dof:\t%d' % (dof)
    print 'p-value:\t%.4f' % (pval)
Пример #51
0
def background_data(imageName, center_coords, size=100):
    '''calculate the background value and standard deviations
    and return as a tuple (background, sigma)
    @param imageName the path to the image to get the data for
    @param center_coords the coordinates to center the sampling box around, probably the coordinates of the target object
    @param size the size of the sampling box in pixels
    @returns a modal value with bins of size 1 count and a trimmed standard deviation reject values more than twice the background value'''
    with pyfits.open(imageName) as im:
        box = getBox(im[0], center_coords,size)
    bins = numpy.arange(box.min(), box.max(),1) #use bins of size 1 ranging from the minimum to maximum values of the sample box
    x,y = im_histogram(box, bins=bins)
    #compute the location of the peak of the histogram
    midx = numpy.argmax(y)
    center = x[midx]
    sigma = tstd(box, [0,2*center]) #trim to twice the the peak value
    return (center, sigma)
Пример #52
0
def main(argv):
    args = ARGS.parse_args()

    count = args.count
    concurrency = args.concurrency
    verbose = args.verbose
    tries = args.tries

    loop = asyncio.get_event_loop()
    suite = [run_aiohttp, run_tornado, run_twisted]

    suite *= tries
    random.shuffle(suite)

    all_times = collections.defaultdict(list)
    all_rps = collections.defaultdict(list)
    for test in suite:
        test_name = test.__name__

        rps, times = loop.run_until_complete(run(test, count, concurrency,
                                                 loop=loop, verbose=verbose,
                                                 profile=args.profile))
        all_times[test_name].extend(times)
        all_rps[test_name].append(rps)

    if args.profile:
        profiler.dump_stats('out.prof')

    print()

    for test_name in sorted(all_rps):
        rps = array(all_rps[test_name])
        times = array(all_times[test_name]) * 1000

        rps_mean = tmean(rps)
        times_mean = tmean(times)
        times_stdev = tstd(times)
        times_median = float(median(times))
        print('Results for', test_name)
        print('RPS: {:d},\tmean: {:.3f} ms,'
              '\tstandard deviation {:.3f} ms\tmedian {:.3f} ms'
              .format(int(rps_mean),
                      times_mean,
                      times_stdev,
                      times_median))
    return 0
Пример #53
0
def data():
    '''EXPERIMENTAL DATA''' 
    #Microspheres size measurements [microsphere diameters] matrix
    micsM=pickle.load( open( "micsmat.p", "rb" ) )
	
    '''STATISTICAL VALUES'''		
    'mean  vector'
    meanV=np.zeros(8)
    for i in range(0,8):
        meanV[i]=np.mean(micsM[i])
    
    'sample std. deviation (sigma) vector'
    sigmaV=np.zeros(8)
    for j in range(0,8):
        sigmaV[j]=tstd(micsM[j])
		
    return micsM, meanV, sigmaV
Пример #54
0
 def get_merit(self, start_date, number_of_days_to_look_back, use_exponential_moving_average=False):
     adjusted_prices = self.get_adjusted_prices_in_range(start_date, number_of_days_to_look_back)
     
     if adjusted_prices == None:
         return None
     
     merit = None
     if use_exponential_moving_average:
         scaling_factor = 0.6
         
         ema_for_start_date = self.compute_exponential_moving_average(adjusted_prices, scaling_factor)
         
         # Compute EMA for trading day |number_of_days_to_look_back| days ago.
         start_date_index = self.find_date_index_in_adjusted_prices(start_date)
         previous_start_date_index = start_date_index + number_of_days_to_look_back
         previous_months_adjusted_prices = self.get_adjusted_prices_in_range(previous_start_date_index, number_of_days_to_look_back, 'index')
         ema_for_previous_start_date = self.compute_exponential_moving_average(previous_months_adjusted_prices, scaling_factor)
         
         merit = ((ema_for_start_date - ema_for_previous_start_date) / ema_for_previous_start_date) / stats.tstd(adjusted_prices)
     else:
         # Reverse adjusted_prices so index 0 is the oldest date
         adjusted_prices.reverse()
     
         # Compute best fit line's slope and y-intercept for adjusted_prices
         slope, y_intercept = numpy.polyfit(range(0, number_of_days_to_look_back), adjusted_prices, 1)
     
         # Build list for adjusted_prices best fit line
         best_fit_line_for_adjusted_prices = []
         actual_and_best_fit_difference = []
         for x in range(0, number_of_days_to_look_back):
             best_fit_line_for_adjusted_prices.append((slope * x) + y_intercept)
             diff = (adjusted_prices[x] - best_fit_line_for_adjusted_prices[x]) / best_fit_line_for_adjusted_prices[x]
             actual_and_best_fit_difference.append(diff)
     
         # Compute gain of best_fit_line_for_adjusted_prices
         start_price = best_fit_line_for_adjusted_prices[0]
         end_price   = best_fit_line_for_adjusted_prices[-1]
         best_fit_gain_percentage = 100.0 * (end_price - start_price) / start_price
     
         # Compute standard deviation of best_fit_line_for_adjusted_prices
         actual_and_best_fit_difference_standard_deviation = stats.tstd(actual_and_best_fit_difference)
     
         merit = best_fit_gain_percentage / actual_and_best_fit_difference_standard_deviation
     
     return merit
def get_ci(im_data, center='median', mod=3000.0, percentile=0.01):

    flattened = scipy.concatenate(im_data)
    flattened = flattened[scipy.nonzero(flattened)]

    if center == 'median':
        mu = scipy.median(flattened)
    elif center == 'mean':
        mu = scipy.average(flattened)

    sigma = stats.tstd(flattened)

    mod == scipy.float_(mod)
    sigma = var_truncNormal(mu - mod, mu + mod, mu, sigma, flattened, mod=mod)

    ci = 2 * mu - stats.norm.ppf(percentile, mu, sigma)

    return ci
Пример #56
0
def id_cr(orig_img, kernel_size = 9, thresh = 300):
    img = orig_img.copy()
    filt_img = img.copy()
    l1 = pyplot.imshow(img, interpolation = 'nearest', cmap = 'bone', vmin = 0, vmax = 1000)
    for irow in range(np.shape(img)[0]):
        filt_img[irow, :] = medfilt(filt_img[irow, :], kernel_size = kernel_size)
        stdev = tstd(img[irow, :])
        cr_pix = np.where(abs((img[irow, :] - filt_img[irow, :])) > stdev)
        img[irow, cr_pix[0]] = -999
    #What should I do with the error array
    x = np.where(img == -999)
    #Uncomment this block of code to see 
    #print np.shape(x)
    #pyplot.plot(x[1], x[0], 'r.')
    #pdb.set_trace()
    #pyplot.close()

    return img
Пример #57
0
def _convert_time_series(time_series):
	"""Establishes symbolic library and normalizes the time series data, then
	represents the data (represented as a list of floats) with that library
	(i.e., each data point is converted into a symbol to be used in the
	symbolic matrix for motif discovery - allows us to compare similar data
	points by specifying a threshold range of values that each symbol
	represents). This function will average data points over intervals of
	specified length to determine the symbolic representations of those
	intervals.
	"""
	differential = np.subtract(time_series[1:], time_series[:-1])
	std_dev = stats.tstd(differential)
	mean = stats.tmean(differential)
	norm_differential = (differential - mean) / std_dev	
	percentiles = np.arange(len(symbol_list) + 1) * (100. / len(symbol_list))
	zscores = np.array([stats.scoreatpercentile(norm_differential, p) for p in percentiles])

	return norm_differential, zscores
Пример #58
0
def filterByRemoval(data, ydata, time):
    std = stats.tstd(data)
    mean = stats.tmean(data)

    for i, x in enumerate(data):
        if x > mean + (1*std):
            data[i] = 'x'           
            ydata[i] = 'x'
            time[i] = 'x'
        elif x < mean - (2*std):
            data[i] = 'x'
            ydata[i] = 'x'
            time[i] = 'x'
    ##a[:] = [x for x in a if x != [1, 1]]
    time[:] = [x for x in time if x != 'x']
    data[:]  = [x for x in data if x != 'x']
    ydata[:] = [x for x in ydata if x != 'x']
    return data, ydata, time
def get_ci(im_data, center='median', mod=3000.0, percentile=0.01):

    flattened = sp.concatenate(im_data)
    flattened = flattened[sp.nonzero(flattened)]

    if center == 'median':
        mu = sp.median(flattened)
    elif center == 'mean':
        mu = sp.average(flattened)

    old_sigma = stats.tstd(flattened)

    # mod = sp.float_(mod)
    sigma, x1, x2, cx, yhat, sigma2 = var_truncNormal(mu - mod, mu + mod, mu,
                                              old_sigma, flattened, mod=mod)

    ci = 2 * mu - stats.norm.ppf(percentile, mu, sigma)

    return ci, mu, sigma, mu-mod, mu+mod, old_sigma, x1, x2, cx, yhat, sigma2