Пример #1
0
def num_features_smarch(samplefile_, n_):
    _configs = list()

    if os.path.exists(samplefile_):
        with open(samplefile_, "r") as sf:
            for line in sf:
                raw = line.split(',')
                config = raw[:len(raw) - 1]
                _configs.append(config)
    else:
        return -1

    _samples = list()
    if n_ < 0:
        _samples = _configs.copy()
    else:
        rands = get_random(n_, len(_configs))
        for r in rands:
            _samples.append(_configs[r - 1])

    _fnums = list()
    for sample in _samples:
        fnum = 0
        for v in sample:
            if not v.startswith('-'):
                fnum += 1
        _fnums.append(fnum)

    if n_ < 0:
        avg = stats.tmean(_fnums)
        std = stats.tstd(_fnums)
        return avg, std

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #2
0
	def query4(self, length=8):	
		global data1
		data1=pandas.read_sql_query(query['4a'], cnx)
		pysql = lambda q: pandasql.sqldf(q, globals())
		data1_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data1 ")
		global data2
		data2=pandas.read_sql_query(query['4b'], cnx)
		data2_rep = pysql("select p_id as \"Patient ID\",exp as \"Expression Val\" from data2 ")			
		a=data1['exp'].values
		b=data2['exp'].values
		print(stats.tmean(a))
		print(stats.tmean(b))
		print(stats.tvar(a))
		print(stats.tvar(b))
		return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
           			 <form method="post" action="processQuery4"> 
            			Custom Query on Result: 
            			<input type="text" name="qu"><br> 
    					<input type="submit">
    				</form>
    				<h2>T-statistics for Exp Values::</h2>"""+(str)(stats.ttest_ind(a,b,equal_var=True)[0])+"""
					<h1>Exp values for patients with ALL<h3>(Rows-"""+str(len(data1.index))+""")</h3></h1>"""+data1_rep.to_html(index=False)+"""
					<h1>Exp values for patients without ALL<h3>(Rows-"""+str(len(data2.index))+""")</h3></h1>"""+data2_rep.to_html(index=False)+"""
Пример #3
0
  def compute_metric(self):

    tmid=(self.ts.t[:-1]+self.ts.t[1:])/2.0
    rng=range(1,len(tmid)) # Throw out first and last
    self.tmid=tmid[rng]         
    
    maxval=numpy.zeros(len(rng))
    minval=numpy.ones(len(rng))*1e100

    self.rate=[]
    for v in self.ts:
      self.rate.append(numpy.divide(numpy.diff(v)[rng],
                                    numpy.diff(self.ts.t)[rng]))
      maxval=numpy.maximum(maxval,self.rate[-1])
      minval=numpy.minimum(minval,self.rate[-1])

    vals=[]
    mean=[]
    std=[]
    for j in range(len(rng)):
      vals.append([])
      for v in self.rate:
        vals[j].append(v[j])
      mean.append(tmean(vals[j]))
      std.append(tstd(vals[j]))

    imbl=maxval-minval

    self.ratio=numpy.divide(std,mean)
    self.ratio2=numpy.divide(imbl,maxval)

    # mean of ratios is the threshold statistic
    self.metric = abs(tmean(self.ratio))
Пример #4
0
  def get_aggregate_stats(self, duration_start=0, duration_end=None):
    if duration_end is None:
      duration_end = self.get_duration()

    filtered_stats = self._filter_stats(duration_start, duration_end)
    filtered_interactions = self._filter_interactions(duration_start, duration_end)

    lens_by_src = {}
    for p in filtered_stats:
      lens_by_src.setdefault(p.get('src_addr'), []).append(p.get('pkt_len'))

    int_durations = [i.get_duration() for i in filtered_interactions]
    total_bytes = sum([p.get('pkt_len') for p in filtered_stats])

    aggregate_stats = {
        'duration': self.get_duration(),
        'avg_lens': {k: stats.tmean(v) for k,v in lens_by_src.items()},
        'max_lens': {k: max(v) for k,v in lens_by_src.items()},
        'total_by_src': {k: sum(v) for k,v in lens_by_src.items()},
        'num_interactions': len(filtered_interactions),
        'avg_interaction_duration': stats.tmean(int_durations) if int_durations else 0,
        'max_interaction_duration': max(int_durations) if int_durations else 0,
        'min_interaction_duration': min(int_durations) if int_durations else 0,
        'total_bytes': total_bytes
        }
    return aggregate_stats
Пример #5
0
  def compute_metric(self):

    ts=self.ts
    gfloprate = numpy.zeros(len(ts.t)-1)
    gdramrate = numpy.zeros(len(ts.t)-1)
    gcpurate  = numpy.zeros(len(ts.t)-1)
    for h in ts.j.hosts.keys():
      if ts.pmc_type == 'amd64' :
        gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t))
        gdramrate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t))
        gcpurate  += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t))
      elif ts.pmc_type == 'intel_snb':
        gfloprate += numpy.divide(numpy.diff(ts.data[0][h][0]),numpy.diff(ts.t))
        gfloprate += numpy.divide(numpy.diff(ts.data[1][h][0]),numpy.diff(ts.t))
        gdramrate += numpy.divide(numpy.diff(ts.data[2][h][0]),numpy.diff(ts.t))
        gcpurate  += numpy.divide(numpy.diff(ts.data[3][h][0]),numpy.diff(ts.t))
        
    mfr=tmean(gfloprate)/ts.numhosts
    mdr=tmean(gdramrate)/ts.numhosts
    mcr=tmean(gcpurate)/(ts.numhosts*ts.wayness*100.)



    if (mcr/self.peak[ts.pmc_type][2] > 0.5):
      self.metric = (mfr/self.peak[ts.pmc_type][0])/(mdr/self.peak[ts.pmc_type][1]) 
    else: 
      self.metric = 0

    return
Пример #6
0
def filterByMutualRemoval(data1, data2):
    nSTD = 1
    
    x=[]
    y=[]

    std1 = stats.tstd(data1)
    mean1 = stats.tmean(data1)
    
    std2 = stats.tstd(data2)
    mean2 = stats.tmean(data2)
    
    print 'm1, std1: ', mean1, std1
    print 'm2, std2: ', mean2, std2
    
    for i, value in enumerate(data1): 
        if (data1[i] > mean1 + (nSTD*std1)):
            pass
        elif (data1[i] < mean1 - (nSTD*std1)):
            pass
        elif data2[i] > mean2 + (nSTD*std2):
            pass           
        elif value < mean2 - (nSTD*std2):
            pass
        else:
            x.append(data1[i])
            y.append(data2[i])
                    
    return x,y
Пример #7
0
 def compute_ttest_for_col(self, p_thresh):
     res_4df = {'features': [], 'ttest': [], 'welch': []}
     res = dict()
     for col in self.ls_cols:
         group1 = self.df[self.df[self.group_col] == self.groups[0]][col]
         group2 = self.df[self.df[self.group_col] == self.groups[1]][col]
         ttest_eq_pop_var = stats.ttest_ind(group1, group2, equal_var=True)
         ttest_welch = stats.ttest_ind(group1, group2, equal_var=False)
         if ttest_eq_pop_var[1] < p_thresh:
             meas, struct = get_structure_measurement(
                 col, self.ls_meas, self.ls_struct)
             #print('{:<15} {}'.format(meas, struct))
             res[col] = {
                 '{}, mean'.format(self.groups[0]): stats.tmean(group1),
                 '{}, std'.format(self.groups[1]): stats.tstd(group2),
                 '{}, mean'.format(self.groups[1]): stats.tmean(group2),
                 '{}, std'.format(self.groups[1]): stats.tstd(group2),
                 'ttest': ttest_eq_pop_var[1],
                 'welch': ttest_welch[1],
                 'kurtosis': stats.kurtosis(self.df[self.group_col]),
                 'skewness': stats.skew(self.df[self.group_col])
             }
             res_4df['features'].append(struct + ' (' + meas + ')')
             res_4df['ttest'].append(ttest_eq_pop_var[1])
             res_4df['welch'].append(ttest_welch[1])
     self.save_res(res_4df)
     return res
Пример #8
0
    def query4(self):
        global data1
        data1 = pandas.read_sql_query(query['4a'], cnx)
        global data2
        data2 = pandas.read_sql_query(query['4b'], cnx)
        a = data1['Expression Val'].values
        b = data2['Expression Val'].values
        print(stats.tmean(a))
        print(stats.tmean(b))
        print(stats.tvar(a))
        print(stats.tvar(b))
        tt = stats.ttest_ind(a, b, equal_var=True)
        return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
           			 <form method="post" action="processQuery4"> 
            			Custom Query on Result: 
            			<input type="text" name="qu"><br> 
    					<input type="submit">
    				</form>
    				<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    				<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
					<h1>Exp values for patients with ALL<h3>(Rows-""" + str(len(
            data1.index)) + """)</h3></h1>""" + data1.to_html(
                index=False) + """
					<h1>Exp values for patients without ALL<h3>(Rows-""" + str(
                    len(data2.index)) + """)</h3></h1>""" + data2.to_html(
                        index=False) + """
Пример #9
0
def print_and_plot_results(count, results, verbose, plot_file_name):
    print("RPS calculated as 95% confidence interval")

    rps_mean_ar = []
    low_ar = []
    high_ar = []
    test_name_ar = []

    for test_name in sorted(results):
        data = results[test_name]
        rps = count / array(data)
        rps_mean = tmean(rps)
        rps_var = tvar(rps)
        low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5)
        times = array(data) * 1000000 / count
        times_mean = tmean(times)
        times_stdev = tstd(times)
        print('Results for', test_name)
        print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,'
              '\tstandard deviation {:.3f} μs'
              .format(int(rps_mean),
                      int(low),
                      int(high),
                      times_mean,
                      times_stdev))

        test_name_ar.append(test_name)
        rps_mean_ar.append(rps_mean)
        low_ar.append(low)
        high_ar.append(high)

        if verbose:
            print('    from', times)
        print()


    if plot_file_name is not None:
        import matplotlib.pyplot as plt
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        L = len(rps_mean_ar)
        color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)]
        bars = ax.bar(
            arange(L), rps_mean_ar,
            color=color, yerr=(low_ar, high_ar), ecolor='k')
        # order of legend is reversed for visual appeal
        ax.legend(
            reversed(bars), reversed(test_name_ar),
            loc='upper left')
        ax.get_xaxis().set_visible(False)
        plt.ylabel('Requets per Second', fontsize=16)
        print(plot_file_name)
        plt.savefig(plot_file_name, dpi=96)
        print("Plot is saved to {}".format(plot_file_name))
        if verbose:
            plt.show()
Пример #10
0
  def plot(self,jobid,job_data=None):    
    if not self.setup(jobid,job_data=job_data):
      return
    
    ts=self.ts

    host_cpi = {}
    host_names = sorted(ts.data[0].keys())
    for v in host_names:
        ncores = len(ts.data[0][v])
        num = 0
        den = 0
        for k in range(ncores):
          ratio = nan_to_num(diff(ts.data[0][v][k]) / diff(ts.data[1][v][k]))

          try: cpi = vstack((cpi,ratio))
          except: cpi = array([ratio]) 
        
          num += diff(ts.data[0][v][k])
          den += diff(ts.data[1][v][k])

        host_cpi[v] = tmean(nan_to_num(num/den))

    mean_cpi = tmean(host_cpi.values())
    if len(host_cpi.values()) > 1:
      var_cpi  = tvar(host_cpi.values())
    else: var_cpi= 0.0

    self.fig = Figure(figsize=(10,12),dpi=110)
    self.ax=self.fig.add_subplot(1,1,1)

    ycore = arange(cpi.shape[0]+1)
    time = ts.t/3600.
    yhost=arange(len(host_cpi.keys())+1)*ncores + ncores

    fontsize = 8
    set_printoptions(precision=4)
    if len(yhost) > 80:
        fontsize /= 0.5*log(len(yhost))
    self.ax.set_ylim(bottom=ycore.min(),top=ycore.max())
    self.ax.set_yticks(yhost[0:-1]-ncores/2.)

    self.ax.set_yticklabels([key +'(' + "{0:.2f}".format(host_cpi[key]) +')' for key in host_names],fontsize=fontsize)

    self.ax.set_xlim(left=time.min(),right=time.max())
    
    pcm = self.ax.pcolor(time, ycore, cpi,vmin=0.0,vmax=5.0)
    pcm.cmap = cm.get_cmap('jet_r')

    try: self.ax.set_title(self.k2[ts.pmc_type][0] +'/'+self.k2[ts.pmc_type][1] + '\n' + 
                           r'Mean(Std)='+'{0:.2f}'.format(mean_cpi)+r'({0:.2f})'.format(sqrt(var_cpi)))
    except: self.ax.set_title(self.k2[0] +'/'+self.k2[1] + '\n'+ 
                              r'$\bar{Mean}$='+'{0:.2f}'.format(mean_cpi)+r'$\pm$'+'{0:.2f}'.format(sqrt(var_cpi)))
    self.fig.colorbar(pcm)
    self.ax.set_xlabel('Time (hrs)')
    self.output('heatmap')
def sma(hd, time_period=90, offset=0):
    """
    Returns the simple moving average for the stock over the specified period of time.
    Note: time_period is used instead of n since typically the time period here being
    used is greater than n.
    """
    if len(hd.close) >= offset+time_period:
        return sts.tmean(hd.close[offset:offset+time_period])
    else:
        return sts.tmean(hd.close[offset:])
Пример #12
0
def print_and_plot_results(count, results, verbose, plot_file_name):
    print("RPS calculated as 95% confidence interval")

    rps_mean_ar = []
    low_ar = []
    high_ar = []
    test_name_ar = []

    for test_name in sorted(results):
        data = results[test_name]
        rps = count / array(data)
        rps_mean = tmean(rps)
        rps_var = tvar(rps)
        low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5)
        times = array(data) * 1000000 / count
        times_mean = tmean(times)
        times_stdev = tstd(times)
        print('Results for', test_name)
        print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,'
              '\tstandard deviation {:.3f} μs'.format(int(rps_mean), int(low),
                                                      int(high), times_mean,
                                                      times_stdev))

        test_name_ar.append(test_name)
        rps_mean_ar.append(rps_mean)
        low_ar.append(low)
        high_ar.append(high)

        if verbose:
            print('    from', times)
        print()

    if plot_file_name is not None:
        import matplotlib.pyplot as plt
        from matplotlib import cm
        fig = plt.figure()
        ax = fig.add_subplot(111)
        L = len(rps_mean_ar)
        color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)]
        bars = ax.bar(arange(L),
                      rps_mean_ar,
                      color=color,
                      yerr=(low_ar, high_ar),
                      ecolor='k')
        # order of legend is reversed for visual appeal
        ax.legend(reversed(bars), reversed(test_name_ar), loc='upper left')
        ax.get_xaxis().set_visible(False)
        plt.ylabel('Requets per Second', fontsize=16)
        print(plot_file_name)
        plt.savefig(plot_file_name, dpi=96)
        print("Plot is saved to {}".format(plot_file_name))
        if verbose:
            plt.show()
Пример #13
0
        def train_models(slope_history):
            if len(slope_history) < 3:
                return None
            #slope_histroy = list of tuples: (avg_outdoor, slope, std_err,temperature_profile[0,0])
            sh = np.matrix(slope_history)
            lnmodel = LinearRegression()
            error_inverse = np.array(1 / sh[:, 2])[:, 0]
            lnfit = lnmodel.fit(sh[:, 0], sh[:, 1])
            #svr_rbf = SVR(kernel='linear', C=10, epsilon=0.5)
            #svrfit = svr_rbf.fit(sh[:, 0], sh[:,1])

            ln_residue = []
            for i in range(len(slope_history)):
                p = lnfit.predict(slope_history[i][0])[0][0]
                ln_residue.append((p - slope_history[i][1])**2)

            ln_std = stats.tstd(ln_residue)

            ln_mean = stats.tmean(ln_residue)
            new_sh = None
            for i in range(len(ln_residue)):
                if ln_residue[i] < ln_mean + 3 * ln_std:
                    #sh = np.delete(sh,i,axis=0)
                    if new_sh is None:
                        new_sh = sh[i, :]
                    else:
                        new_sh = np.vstack((new_sh, sh[i, :]))

            sh = new_sh
            #redo the fit
            error_inverse = np.array(1 / sh[:, 2])[:, 0]

            slope_mean = stats.tmean(sh[:, 1])
            slope_std = stats.tstd(sh[:, 1])

            lnfit = lnmodel.fit(sh[:, 0], sh[:, 1])
            ln_residue = []
            for i in range(len(sh)):
                p = lnfit.predict(sh[i, 0])[0][0]
                ln_residue.append((p - sh[i, 1])**2)

            ln_std = stats.tstd(ln_residue)
            ln_mean = stats.tmean(ln_residue)

            return {
                'ln_model': lnfit,
                'ln_residue': ln_residue,
                'ln_residue_std': ln_std,
                'ln_residue_mean': ln_mean,
                'slope_mean': slope_mean,
                'slope_std': slope_std,
                'data_matrix': sh
            }
Пример #14
0
def main():
    f27_scan = open('sim_scan27.txt', 'r')
    f27_table = open('sim_table27.txt', 'r')
    f35932_scan = open('sim_scan35932.txt', 'r')
    f35932_table = open('sim_table35932.txt', 'r')
    
    ntests = 10
    
    scan27 = [0 for i in range(ntests)]
    table27 = [0 for i in range(ntests)]
    scan35932 = [0 for i in range(ntests)]
    table35932 = [0 for i in range(ntests)]

    files = [f27_scan, f27_table, f35932_scan, f35932_table]
    arrs = [scan27, table27, scan35932, table35932]

    for i in range(ntests):
        for j in range(4):
            line = files[j].readline()
            if line[len(line)-1] == '\n':
                line = line[:len(line)-1]
            arrs[j][i] = float(line)
    for j in range(4):
        files[j].close()

    _, p27 = stats.ttest_ind(scan27, table27, equal_var=False)
    mean27scan = stats.tmean(scan27)
    mean27table = stats.tmean(table27)
    var27scan = stats.tvar(scan27)
    var27table = stats.tvar(table27)

    _, p35932 = stats.ttest_ind(scan35932, table35932, equal_var=False)
    mean35932scan = stats.tmean(scan35932)
    mean35932table = stats.tmean(table35932)
    var35932scan = stats.tvar(scan35932)
    var35932table = stats.tvar(table35932)

    f = open('sim_results_compare_scan_table.txt', 'w')
    f.write('27\n')
    f.write('scan mean: ' + str(mean27scan) + '\n')
    f.write('scan var: ' + str(var27scan) + '\n')
    f.write('table mean: ' + str(mean27table) + '\n')
    f.write('table var: ' + str(var27table) + '\n')
    f.write('p-value: ' + str(p27) + '\n\n')

    f.write('35932\n')
    f.write('scan mean: ' + str(mean35932scan) + '\n')
    f.write('scan var: ' + str(var35932scan) + '\n')
    f.write('table mean: ' + str(mean35932table) + '\n')
    f.write('table var: ' + str(var35932table) + '\n')
    f.write('p-value: ' + str(p35932) + '\n')

    f.close()
Пример #15
0
  def compute_metric(self):

    ts = self.ts

    tmid=(ts.t[:-1]+ts.t[1:])/2.0       

    # Average over each node's time series turning nan's to zero's
    ratio = {}
    for k in ts.j.hosts.keys():
      ratio[k] = tmean(numpy.nan_to_num(numpy.diff(ts.data[0][k][0])/numpy.diff(ts.data[1][k][0])))

    # Average of time-averaged nodes
    self.metric = tmean(ratio.values())
Пример #16
0
    def process_CustomTstat(self, disease1, disease2, go):
        print(disease2)
        q1 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease1 + "\""
        q1_not = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` !=\"" + disease1 + "\""
        data1 = pandas.read_sql_query(q1, cnx)
        data1_not = pandas.read_sql_query(q1_not, cnx)
        a = data1['exp'].values
        b = data1_not['exp'].values
        print(stats.tmean(a))
        print(stats.tmean(b))
        print(stats.tvar(a))
        print(stats.tvar(b))
        if disease1 == disease2:
            tt = stats.ttest_ind(a, b, equal_var=True)
            return """<html>
					<form method="get" action="index">
              		<button type="submit">Return</button>
           			</form>
           			</form>
    				<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    				<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
					<h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str(
                len(data1.index)
            ) + """)</h3></h1>""" + data1.to_html(index=False) + """
					<h1>Exp values for patients without """ + disease1 + """<h3>(Rows-""" + str(
                len(data1_not.index)) + """)</h3></h1>""" + data1_not.to_html(
                    index=False) + """
					</html>"""
        else:
            q2 = "SELECT s.p_id,maf.exp from GOAnnotation ga inner join (probe pb,Diagnosis dg,disease ds, microarray_fact maf, sample s) on (ga.UID = pb.UID and pb.pb_id=maf.pb_id and dg.ds_id= ds.ds_id and dg.p_id=s.p_id and maf.s_id= s.s_id) where ga.go_id =\"" + go + "\"and ds.`name` =\"" + disease2 + "\""
            data2 = pandas.read_sql_query(q2, cnx)
            b = data2['exp'].values
            print(stats.tmean(a))
            print(stats.tmean(b))
            print(stats.tvar(a))
            print(stats.tvar(b))
            tt = stats.ttest_ind(a, b, equal_var=True)
            return """<html>
				<form method="get" action="index">
            	<button type="submit">Return</button>
           		</form>
           		</form>
           		 
    			<h2>T-statistics for Exp Values::</h2>""" + (str)(tt[0]) + """
    			<h2>Corresponding p-value::</h2>""" + (str)(tt[1]) + """
				<h1>Exp values for patients with """ + disease1 + """<h3>(Rows-""" + str(
                len(data1.index)) + """)</h3></h1>""" + data1.to_html(
                    index=False) + """
				<h1>Exp values for patients with """ + disease2 + """<h3>(Rows-""" + str(
                        len(data2.index)) + """)</h3></h1>""" + data2.to_html(
                            index=False) + """
def Quest4():
	global query
	global cnx
	data=pandas.read_sql_query(query['4a'], cnx)
	data2=pandas.read_sql_query(query['4b'], cnx)
	a=data['exp'].values
	b=data2['exp'].values
	
	print(stats.tmean(a))
	print(stats.tmean(b))
	print(stats.tvar(a))
	print(stats.tvar(b))

	print(stats.ttest_ind(a,b,equal_var=True))
	return
Пример #18
0
    def close_measure(self, duration):
        """
        Method call upon end of an incident. If the duration is apprpriate, the observation will be stored in db
        :param duration:
        :return:
        """
        if duration > self.observation_minimum_duration_seconds:

            mean = stats.tmean(self.active_observations) / 10
            variance = stats.variation(self.active_observations) / 10

            self.parent_service.log.info(
                f'Closing reading for {str(self)}. '
                f'During {duration} seconds, '
                f'{len(self.active_observations)} observations were made. '
                f'Average intensity: {mean}, variance: {variance}')

            stored_reading = self.parent_service.persistence.store_intensity_reading(
                self.provide_sensor(), self.active_observations_since,
                duration, mean, variance, datetime.now())

            self.parent_service.log.debug(
                f'Stored new intensity reading: {str(stored_reading)}')

        else:
            self.parent_service.log.debug(
                f'Duration of an observation {str(self)} is too short. Resetting'
            )

        del self.active_observations
Пример #19
0
    def norm_fit_sparsely(self,
                          show_it=0,
                          save_it=0,
                          save_dir=None,
                          save_name=None,
                          start=0,
                          end=0):
        if not end: print('norm fit:asign end')
        _sparseness = 10**4
        _start = int(start)
        _end = int(end)
        _data_num = _end - _start
        print(
            223, 'int(data_num/sparseness + 1):{}, data_num:{}'.format(
                int(_data_num / _sparseness + 1), _data_num))
        _cur_x = [
            self.x[i]
            for i in range(_start, _end, int(_data_num / _sparseness + 1))
        ]

        _guess = [stats.tmean(self.x), stats.tstd(self.x)]
        _x = _cur_x
        _x.sort()
        self.norm_params, self.norm_params_covariance = optimize.curve_fit(
            self.norm_dist_CDF, _x,
            [(i + 1) / len(_x) for i in range(len(_x))], _guess)
        self.hist_norm_of_move_sparsely(show_it=show_it,
                                        save_it=save_it,
                                        save_dir=save_dir,
                                        save_name=save_name,
                                        start=start,
                                        end=end)
Пример #20
0
 def predict(self, data, regressive=False):
     print "Predicting..."
     result = range(len(data))
     for testRow in xrange(len(data)):
         print "For testRow...", testRow
         distances = []
         #print distances
         distances = range(len(self.data))
         for trainRow in xrange(len(self.data)):
             #if trainRow < 10:
             #    print distances
             for col in xrange(len(self.data[testRow])):
                 #print data[testRow][col]
                 distances[trainRow] += float(
                     (data[testRow][col] - self.data[trainRow][col])**2)
         topK = []
         for i in xrange(self.k):
             topK.append(-1)
         for i in xrange(len(distances)):
             comparison = i
             for j in xrange(self.k):
                 if (topK[j] == -1) or (distances[topK[j]] > distances[i]):
                     comparison, topK[j] = topK[j], comparison
         #print("len of data, targets: " + str(len(self.data)) + ", " + str(len(self.targets)))
         #print("topK: " + str(topK))
         #print("self.targets: " + str(self.targets))
         #print("self.targets[topK, :]: " + str(self.targets[topK]))
         if not regressive:
             result[testRow] = int(mode(self.targets[topK])[0][0])
         else:
             result[testRow] = int(tmean(self.targets[topK])[0][0])
     return result
Пример #21
0
def plot_stats(statfile, statplotctime, statplotlcode):
    """
    Grafica las estadísticas.

    :param statplotlcode: Archivo de gráficos línea de código
    :param statplotctime: Archivo de gráficos tiempo de compilación
    :param statfile: Archivo de estadísticas
    :return:
    """
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MaxNLocator

    data = open(statfile)
    numcomp = []
    timecomp = []
    lcode = []
    k = 0
    for i in data:
        if k > 0:
            j = split_str(i.strip(), ' ')
            numcomp.append(int(j[0]))
            timecomp.append(float(j[2]))
            lcode.append(int(j[4]))
        k += 1
    nlen = len(numcomp)
    lastid = numcomp[nlen - 1]
    if nlen >= 3 and SCIPY:
        # Tiempo de compilación
        tme = stats.tmean(timecomp)
        trc = stats.trim_mean(timecomp, 0.15)

        plt.figure(1)
        fig, ax = plt.subplots()
        ax.plot(numcomp, timecomp, 'c', label=u'Tiempo compilación (s)')
        ax.plot([numcomp[0], numcomp[nlen - 1]], [tme, tme],
                'r--',
                label=f'Tiempo medio ({tme:.3g}s)')
        ax.plot([numcomp[0], numcomp[nlen - 1]], [tme, tme],
                'b--',
                label=f'Media acotada ({trc:.3g}s)')
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.set_xlabel(u'Número de compilación')
        ax.set_ylabel(u'Tiempo de compilación [s]')
        ax.set_title(u'Estadísticas')
        plt.xlim(1, lastid)
        ax.legend()
        fig.savefig(statplotctime, dpi=600)

        # Líneas de código
        fig, ax = plt.subplots()
        ax.plot(numcomp, lcode)
        ax.set_xlabel(u'Número de compilación')
        ax.set_ylabel(u'Líneas de código')
        ax.set_title(u'Estadísticas')
        plt.ylim([min(lcode) * 0.97, max(lcode) * 1.03])
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.xlim(1, lastid)
        fig.savefig(statplotlcode, dpi=600)

    data.close()
Пример #22
0
 def get_marginal_mean(self, **kwargs):
     """
     Get the marginal mean of a subset of the data, supplied as factor:level
     arguments
     """
     try: return stats.tmean([d[self.outcome_key] for d in self.data if all([d[k]==kwargs[k] for k in kwargs])])
     except: raise TypeError('You must specify at least one factor with a list containing at least one level.')
Пример #23
0
	def __init__(self, filename, sep=",", skip=0, index=True, header=True): 
		if type(filename) == str:
			allm = gendata(filename, sep=sep, skip=skip, index=index, header=header)
			if index==True:
				self.date = allm[0] 
				self.data = allm[1]
				self.header = allm[2]
			else:
				self.data = allm[0]
				self.header = allm[1]
				self.date = "none"
			self.data = self.data.transpose()
		else:
			self.data = filename
			self.date = index
			self.header = header
		self.N,self.T = self.data.shape
		self.mean = [sps.tmean(i) for i in self.data]
		if self.N > 1:
			self.variance = [sps.tvar(i) for i in self.data]
		else:
			self.variance = [sps.tvar(self.data)]
		self.mean = np.array(self.mean)
		self.variance = np.array(self.variance)
		self.covariance = np.zeros((self.N, self.N))
		self.correlation = np.zeros((self.N, self.N))
		self.skewness = 0
		self.kurtosis = 0
		self.dmean = (self.data.transpose()-self.mean).transpose()
		self.did_covar = False
		self.JB = 0
		self.JBpvalue = 0 
Пример #24
0
 def perform_test(self):
     """
     Perform requested tests
     """
     # store some stuff for reuse
     combined_sample = [d[self.outcome_key] for d in self.data]
     grand_mean = stats.tmean(combined_sample)
     sum_of_grand_mean_squared = self.n*grand_mean**2
     sum_of_observations_squared = sum(d[self.outcome_key]**2 for d in self.data)
     highest_order_interaction_bracket_term = self.__get_mean_bracket_term(self.factor_keys) 
     ss_error = sum_of_observations_squared - highest_order_interaction_bracket_term
     df_error = float((self.n_per_condition-1)*self.num_conditions)
     ms_error = ss_error / df_error
     
     # store each main effect and interaction as a dictionary in a list
     self.results = list()
     bracket_terms = dict()
     for i in range(len(self.factor_keys)):
         combinations = itertools.combinations(self.factor_keys, i+1)
         for combination in combinations:
             bracket_terms[combination] = self.__get_mean_bracket_term(combination)
             ss_explained = (
                             bracket_terms[combination] + 
                             sum([bracket_terms[b]*(1 if len(b)%2==len(combination)%2 else -1) for b in bracket_terms.keys() if len(b) < len(combination) and all([t in combination for t in b])]) +
                             sum_of_grand_mean_squared*(1 if len(combination)%2==0 else -1)
                             )
             self.results.append(self.__create_new_source_for_results('x'.join(combination)))
             r = ANOVAResults
             
             self.results[-1].ss = ss_explained
             self.results[-1].df = reduce(lambda x, y: x*y, [len(self.factor_levels[f])-1.0 for f in combination])
             self.results[-1].ms = self.results[-1].ss / self.results[-1].df
             self.results[-1].test_statistic = self.results[-1].ms / ms_error
             self.results[-1].p_two = stats.f.sf(self.results[-1].test_statistic, self.results[-1].df, df_error)
             self.results[-1].effect_size = 0.0 # TODO
             self.results[-1].partial_effect_size = ANOVA.estimate_partial_effect_size(self.results[-1].test_statistic, self.results[-1].df, self.n)
     # fill in complete effects
     f_df_pairs = [(effect.test_statistic, effect.df) for effect in self.results]
     for i in range(len(self.results)):
         self.results[i].effect_size = ANOVA.estimate_complete_effect_size(
                                                                           self.results[i].test_statistic,
                                                                           self.results[i].df,
                                                                           self.n,
                                                                           *f_df_pairs
                                                                           )
     # add entry for error
     self.results.append(self.__create_new_source_for_results('Error'))
     self.results[-1].ss = ss_error
     self.results[-1].df = df_error
     self.results[-1].ms = ms_error
     # add entry for total
     self.results.append(self.__create_new_source_for_results('Total'))
     self.results[-1].ss = self.ss_total(grand_mean, combined_sample)
     self.results[-1].df = float(self.n-1)
     
     # TODO: simple effects
     # TODO: contrasts
     
     # print results if not in silent mode
     if not self.is_silent: print self.printable_test_results()
Пример #25
0
def start_end (start_date,end_date):

    #create a link from Python to the Dataase
    session=Session(engine)

    #Query the dates and temperature observations of the most active station for the last year of data.
    results = session.query(Measurement.tobs).\
            filter(Measurement.date >= start_date).\
            filter(Measurement.date <= end_date).\
            filter(Measurement.tobs != 'None' and Measurement.tobs !='bb').all()

    session.close()

    #append the temp obervations to a list
    results=np.ravel(results)
    tobs_list=[]
    i=[]
    for tobs in results:
        tobs_dict={}

        tobs_list.append(tobs)

    #assign key:value pairs for minimum, average, and max
    tobs_dict['min']=stats.tmin(tobs_list)
    tobs_dict['avg']=stats.tmean(tobs_list)
    tobs_dict['max']=stats.tmax(tobs_list)

    i.append(tobs_dict)

    #Return a JSON list of temperature observations (TOBS) for the previous year.
    return jsonify(i)
def throughput_from_file(scalar_file, use_link_layer=True):
    """
    Extracts the stream statistics from the file and returns the average and
    minimum stream throughput.
    
    - The use_link_layer option determines whether to use statistics recorded at
      the server's link layer or the application layer.
    """
    scalar_file = pf.check_file(scalar_file)
    mlog.debug("scalar_file = %s", scalar_file)
    if use_link_layer:
        module = "port_queue.stream"
    else:
        module = "simpleUDPApplication.stream"
    #module = ".*stream.*"
    #module = "stream"
    tx_scalar = "streamTxBytes:sum"
    rx_scalar = "streamRxBytes:sum"
    tx_scalar_values = pf.scalar_from_file(scalar_file, tx_scalar, module)
    rx_scalar_values = pf.scalar_from_file(scalar_file, rx_scalar, module)

    try:
        streams = match_streams(tx_scalar_values, rx_scalar_values)
    except:
        mlog.error("Failed to match streams in file: %s", scalar_file)
        raise
    throughputs = [v[2] for v in streams.itervalues()]
    if not len(throughputs) > 0:
        mlog.error("Couldn't get throughput from file %s!", scalar_file)
        raise StandardError("Couldn't process file")
    #mlog.debug("throughputs = %s", throughputs)
    avg_thr = stats.tmean(throughputs)
    min_thr = min(throughputs)
    return avg_thr, min_thr
Пример #27
0
    def handle(self):
        
        # 
        # Sample config values
        # 
        # global_slow_threshold=120
        # max_batch_size=99999
        #
        config = Bunch()
        for item in self.request.payload.splitlines():
            key, value = item.split('=')
            config[key] = int(value)

        for key in self.server.kvdb.conn.keys(KVDB.SERVICE_TIME_RAW + '*'):
            
            service_name = key.replace(KVDB.SERVICE_TIME_RAW, '')
            
            current_mean = float(
                self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time') or 0)
            current_min = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time') or 0)
            current_max = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time') or 0)
            
            batch_min, batch_max, batch_mean, batch_total = self.aggregate_raw_times(
                key, service_name, config.max_batch_size)
            
            self.server.kvdb.conn.hset(
               KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time', sp_stats.tmean((batch_mean, current_mean)))
            self.server.kvdb.conn.hset(
               KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time', min(current_min, batch_min))
            self.server.kvdb.conn.hset(
                KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time', max(current_max, batch_max))
            
            # Services use RPUSH for storing raw times so we are safe to use LTRIM
            # in order to do away with the already processed ones
            self.server.kvdb.conn.ltrim(key, batch_total, -1)
Пример #28
0
    def ownCorrelationMeasure(self, X, Y):
        # Group X-values into categories with their respective set of Y-values
        groups = {}
        for i in range(len(X)):
            key = X[i]
            value = Y[i]
            if key in groups:
                groups[key] += [value]
            else:
                groups[key] = [value]

        # Calculate normal distribution for every X-value
        normal_distributions = {}
        #normal_distributions_old = {}
        for x in groups.keys():
            #normal_distributions_old[x] = stats.norm.fit(groups[x])
            if len(groups[x]) > 1:
                normal_distributions[x] = (stats.tmean(groups[x]), stats.tvar(groups[x]))
            else:
                normal_distributions[x] = (groups[x][0], 0)

        # Calculate correlation measure
        max_dist = max(normal_distributions.values())
        min_dist = min(normal_distributions.values())
        correlation = max_dist[0]/min_dist[0] # Ratio between mean for max and min

        return [correlation, normal_distributions]
Пример #29
0
def player_info(accountId):

    try:
        summoner = watcher.summoner.by_account(my_region, accountId)
        summonerId = summoner['id']
        league = watcher.league.by_summoner(my_region, summonerId)[0]
    except:
        return None, None, None, None, None

    level = summoner['summonerLevel']
    total_win = league['wins']
    total_loss = league['losses']
    hot_streak = int(league['hotStreak'])

    data = [1] * total_win + [0] * total_loss
    win_skew = skew(data)
    win_std = tstd(data)
    win_mean = tmean(data)
    '''
    match_lst = watcher.match.matchlist_by_account(
        my_region, accountId, end_index=30, queue='420')
    for match in match_lst:
        print(match)
    '''

    return win_mean, win_std, win_skew, level, hot_streak
Пример #30
0
 def ss_between(cls, grand_mean=None, *args, **kwargs):
     """
     Get the sum of squared deviations of each group's mean compared to the
     grand mean of all groups
     """
     if grand_mean is None: grand_mean = cls.get_grand_mean(*args)
     return sum([len(a)*(grand_mean-stats.tmean(a))**2 for a in args])
Пример #31
0
 def _calc_rt_base(self, keys):
     values = []
     for key in keys:
         in_time = self.in_tasks[key]
         out_time = self.out_tasks[key]
         values.append(out_time - in_time)
     return stat.tmean(values)
Пример #32
0
  def compute_metric(self):

    gfloprate = 0
    if self.ts.pmc_type == 'amd64' :
      gfloprate += self.arc(self.ts.data[0])
      
    if self.ts.pmc_type == 'intel_hsw':
      # print "Haswell chips do not have FLOP counters"
      return

    if self.ts.pmc_type == 'intel_snb':
      schema = self.ts.j.get_schema('intel_snb')
      if 'ERROR' in schema: return
      data = self.ts.j.aggregate_stats('intel_snb')

      try:
        flops = numpy.diff(data[0][:,schema['SSE_DOUBLE_SCALAR'].index] + 2*data[0][:,schema['SSE_DOUBLE_PACKED'].index] + 
                           4*data[0][:,schema['SIMD_DOUBLE_256'].index])/numpy.diff(self.ts.t)
      except: 
        flops = numpy.diff(data[0][:,schema['SSE_D_ALL'].index] + 4*data[0][:,schema['SIMD_D_256'].index])/numpy.diff(self.ts.t)

      flops = flops/data[1]

    self.metric = tmean(flops)/1.0e9

    return
Пример #33
0
def num_features_DDbS(samplefile_, n_):
    _configs = list()
    init = True

    if os.path.exists(samplefile_):
        with open(samplefile_, 'r') as sf:
            for line in sf:
                if not init:
                    raw = line.split(";")
                    if len(raw) != 0:
                        raw = raw[1:]
                        config = list()
                        for i in range(0, len(raw)):
                            if raw[i] == '1':
                                config.append(i + 1)
                        _configs.append(config)
                else:
                    init = False
    else:
        return -1

    _fnums = list()
    for sample in _configs:
        fnum = 0
        for v in sample:
            if v > 0:
                fnum += 1
        _fnums.append(fnum)

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #34
0
 def test_calculate_mean(self):
     sample = []
     for i in range(0, 100):
         sample.append(random())
     mean = self.stat.calculate_mean(sample)
     control = tmean(sample)
     self.assertAlmostEqual(mean, control)
Пример #35
0
def num_features_QS(samplefile_, n_):
    i = 0

    _configs = list()
    if os.path.exists(samplefile_):
        with open(samplefile_, 'r') as sf:
            for line in sf:
                raw = line.split(" ")
                if len(raw) != 0:
                    config = raw[:len(raw) - 1]
                    _configs.append(config)
                i += 1

    else:
        return -1

    _samples = list()
    rands = get_random(n_, len(_configs))
    for r in rands:
        _samples.append(_configs[r - 1])

    _fnums = list()
    for sample in _samples:
        fnum = 0
        for v in sample:
            if not v.startswith('-'):
                fnum += 1
        _fnums.append(fnum)

    return stats.tmean(_fnums), stats.tstd(_fnums)
Пример #36
0
  def compute_metric(self):

    gvecrate = 0
    if self.ts.pmc_type == 'amd64' :
      gvecrate += self.arc(self.ts.data[0])

    if self.ts.pmc_type == 'intel_snb':
      schema = self.ts.j.get_schema('intel_snb')
      if 'ERROR' in schema: return
      data = self.ts.j.aggregate_stats('intel_snb')
      nodes = data[1]
      data = data[0].astype(float)
      
      try:
        vectorized = 2*data[:,schema['SSE_DOUBLE_PACKED'].index]+4*data[:,schema['SIMD_DOUBLE_256'].index]
        every = vectorized + data[:,schema['SSE_DOUBLE_SCALAR'].index]
      except: 
        vectorized = 4*data[:,schema['SIMD_D_256'].index]
        every = vectorized + data[:,schema['SSE_D_ALL'].index]

      vecs = numpy.diff(vectorized)/numpy.diff(every)

    self.metric = tmean(vecs)

    return
def throughput_from_file(scalar_file, use_link_layer=True):
    """
    Extracts the stream statistics from the file and returns the average and
    minimum stream throughput.
    
    - The use_link_layer option determines whether to use statistics recorded at
      the server's link layer or the application layer.
    """
    scalar_file = pf.check_file(scalar_file)
    mlog.debug("scalar_file = %s", scalar_file)
    if use_link_layer:
        module = "port_queue.stream"
    else:
        module = "simpleUDPApplication.stream" 
    #module = ".*stream.*"
    #module = "stream"
    tx_scalar = "streamTxBytes:sum"
    rx_scalar = "streamRxBytes:sum"
    tx_scalar_values = pf.scalar_from_file(scalar_file, tx_scalar, module)
    rx_scalar_values = pf.scalar_from_file(scalar_file, rx_scalar, module)
    
    try:
        streams = match_streams(tx_scalar_values, rx_scalar_values)
    except:
        mlog.error("Failed to match streams in file: %s", scalar_file)
        raise
    throughputs = [v[2] for v in streams.itervalues()]
    if not len(throughputs) > 0:
        mlog.error("Couldn't get throughput from file %s!", scalar_file)
        raise StandardError("Couldn't process file")
    #mlog.debug("throughputs = %s", throughputs)
    avg_thr = stats.tmean(throughputs)
    min_thr = min(throughputs)
    return avg_thr, min_thr
 def run(self):
     self._arithmetic_mean = mean(self._wave)
     self._harmonic_mean = hmean(self._wave)
     self._geometric_mean = gmean(self._wave)
     self._trimmed_mean = tmean(self._wave)
     self._means_entity = MeansEntity(self._arithmetic_mean, self._harmonic_mean, self._geometric_mean,
                                      self._trimmed_mean)
Пример #39
0
def main():
    train_df = pd.read_csv('data/match_feature.csv')

    # Split X, y and scale
    X, y, min_max_scaler = get_X_y(train_df)
    print("Total dataset size : ", len(X))

    #check_train_size_curve(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.01,
                                                        random_state=42)

    model = Lgbm_Model()
    #param = model.tune(X_train, y_train)
    model.train(X_train, y_train, X_test, y_test)
    #model.save("Lgbm")
    model.evaluate(X_train, y_train, cross_val=True)
    model.evaluate(X_test, y_test)

    res = model.predict(X_train)
    print(tstd(res))
    print(tmean(res))

    plt.hist(res, bins=100)
    plt.show()

    model.plot_importance()
Пример #40
0
    def aggregate_raw_times(self, key, service_name, max_batch_size=None):
        """ Aggregates values from a list living under a given key. Returns its
        min, max, mean and an overall usage count. 'max_batch_size' controls how
        many items will be fetched from the list so it's possible to fetch less
        items than its LLEN returns.
        """
        key_len = self.server.kvdb.conn.llen(key)
        if max_batch_size:
            batch_size = min(key_len, max_batch_size)
            if batch_size < key_len:
                msg = ('batch_size:[{}] < key_len:[{}], max_batch_size:[{}], key:[{}], '
                'consider decreasing the job interval or increasing the max_batch_size').format(
                    batch_size, key_len, max_batch_size, key)
                self.logger.warn(msg)
        else:
            batch_size = key_len
            
        times = [int(elem) for elem in self.server.kvdb.conn.lrange(key, 0, batch_size)]

        if times:
            mean_percentile = int(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'mean_percentile') or 0)
            max_score = int(sp_stats.scoreatpercentile(times, mean_percentile))
            
            return min(times), max(times), (sp_stats.tmean(times, (None, max_score)) or 0), len(times)
        else:
            return 0, 0, 0, 0
Пример #41
0
def print_stats(datums):
    print 'Mean:', stats.tmean(datums)
    print 'Median:', stats.cmedian(datums)
    print 'Std Dev:', stats.tstd(datums)
    print 'Variation:', stats.variation(datums)
    print 'Kurtosis:', stats.kurtosis(datums, fisher=False)
    print 'Skewness:', stats.skew(datums)
Пример #42
0
    def handle(self):
        
        # 
        # Sample config values
        # 
        # global_slow_threshold=120
        # max_batch_size=99999
        #
        config = Bunch()
        for item in self.request.payload.splitlines():
            key, value = item.split('=')
            config[key] = int(value)

        for key in self.server.kvdb.conn.keys(KVDB.SERVICE_TIME_RAW + '*'):
            
            service_name = key.replace(KVDB.SERVICE_TIME_RAW, '')
            
            current_mean = float(
                self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time') or 0)
            current_min = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time') or 0)
            current_max = float(self.server.kvdb.conn.hget(KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time') or 0)
            
            batch_min, batch_max, batch_mean, batch_total = self.aggregate_raw_times(
                key, service_name, config.max_batch_size)
            
            self.server.kvdb.conn.hset(
               KVDB.SERVICE_TIME_BASIC + service_name, 'mean_all_time', sp_stats.tmean((batch_mean, current_mean)))
            self.server.kvdb.conn.hset(
               KVDB.SERVICE_TIME_BASIC + service_name, 'min_all_time', min(current_min, batch_min))
            self.server.kvdb.conn.hset(
                KVDB.SERVICE_TIME_BASIC + service_name, 'max_all_time', max(current_max, batch_max))
            
            # Services use RPUSH for storing raw times so we are safe to use LTRIM
            # in order to do away with the already processed ones
            self.server.kvdb.conn.ltrim(key, batch_total, -1)
Пример #43
0
    def aggregate_raw_times(self, key, service_name, max_batch_size=None):
        """ Aggregates values from a list living under a given key. Returns its
        min, max, mean and an overall usage count. 'max_batch_size' controls how
        many items will be fetched from the list so it's possible to fetch less
        items than its LLEN returns.
        """
        key_len = self.server.kvdb.conn.llen(key)
        if max_batch_size:
            batch_size = min(key_len, max_batch_size)
            if batch_size < key_len:
                msg = 'batch_size:`%s` < key_len:`%s`, max_batch_size:`%s`, key:`%s`, ' \
                'consider decreasing the job interval or increasing max_batch_size'
                self.logger.warn(msg, batch_size, key_len, max_batch_size, key)
        else:
            batch_size = key_len

        times = [
            int(elem)
            for elem in self.server.kvdb.conn.lrange(key, 0, batch_size)
        ]

        if times:
            mean_percentile = int(
                self.server.kvdb.conn.hget(
                    KVDB.SERVICE_TIME_BASIC + service_name, 'mean_percentile')
                or 0)
            max_score = int(sp_stats.scoreatpercentile(times, mean_percentile))

            return min(times), max(times), (sp_stats.tmean(
                times, (None, max_score)) or 0), len(times)
        else:
            return 0, 0, 0, 0
Пример #44
0
    def compute_metric(self):

        gvecrate = 0
        if self.ts.pmc_type == 'amd64':
            gvecrate += self.arc(self.ts.data[0])

        if self.ts.pmc_type == 'intel_hsw' or self.ts.pmc_type == 'intel_knl':
            #print "Haswell does not support FLOP counters"
            return
        if self.ts.pmc_type == 'intel_snb':
            schema = self.ts.j.get_schema('intel_snb')
            if 'ERROR' in schema: return
            data = self.ts.j.aggregate_stats('intel_snb')
            nodes = data[1]
            data = data[0].astype(float)

            try:
                vectorized = 2 * data[:, schema['SSE_DOUBLE_PACKED'].
                                      index] + 4 * data[:, schema[
                                          'SIMD_DOUBLE_256'].index]
                every = vectorized + data[:, schema['SSE_DOUBLE_SCALAR'].index]
            except:
                vectorized = 4 * data[:, schema['SIMD_D_256'].index]
                every = vectorized + data[:, schema['SSE_D_ALL'].index]

            vecs = numpy.diff(vectorized) / numpy.diff(every)

        self.metric = tmean(vecs)

        return
Пример #45
0
    def compute_metric(self):

        gfloprate = 0
        if self.ts.pmc_type == 'amd64':
            gfloprate += self.arc(self.ts.data[0])

        if self.ts.pmc_type == 'intel_hsw' or self.ts.pmc_type == 'intel_knl':
            # print "Haswell chips do not have FLOP counters"
            return

        if self.ts.pmc_type == 'intel_snb':
            schema = self.ts.j.get_schema('intel_snb')
            if 'ERROR' in schema: return
            data = self.ts.j.aggregate_stats('intel_snb')

            try:
                flops = numpy.diff(
                    data[0][:, schema['SSE_DOUBLE_SCALAR'].index] +
                    2 * data[0][:, schema['SSE_DOUBLE_PACKED'].index] + 4 *
                    data[0][:, schema['SIMD_DOUBLE_256'].index]) / numpy.diff(
                        self.ts.t)
            except:
                flops = numpy.diff(data[0][:, schema['SSE_D_ALL'].index] +
                                   4 * data[0][:, schema['SIMD_D_256'].index]
                                   ) / numpy.diff(self.ts.t)

            flops = flops / data[1]

        self.metric = tmean(flops) / 1.0e9

        return
Пример #46
0
def passos(xa, ya, xmax, ymax, series=1000):
    '''Retorna a média de tantas séries de quantos passos o bêbado leva pra chegar em tal ponto.'''
    lista_passos = []
    for i in range(series):
        x, y = 0, 0
        passos = 1
        while x != xa and y != ya:
            a = direcao()
            if a == 'N':
                passos += 1
                y += 1
                if y >= ymax:
                    y = ymax
            elif a == 'S':
                passos += 1
                y -= 1
                if abs(y) >= ymax:
                    y = -ymax
            elif a == 'O':
                passos += 1
                x -= 1
                if abs(x) >= xmax:
                    x = -xmax
            else:
                passos += 1
                x += 1
                if x >= xmax:
                    x = xmax
        lista_passos.append(passos)
    print('Média=' + str(round(stats.tmean(lista_passos), 5)) +
          '\nDesvio-padrão=' + str(round(stats.tstd(lista_passos), 5)))
Пример #47
0
    def collect_service_stats(
        self,
        keys_pattern,
        key_prefix,
        key_suffix,
        total_seconds,
        suffix_needs_colon=True,
        chop_off_service_name=True,
        needs_rate=True,
    ):

        service_stats = {}
        if suffix_needs_colon:
            key_suffix = ":" + key_suffix

        for key in self.kvdb.conn.keys(keys_pattern):
            service_name = key.replace(key_prefix, "").replace(key_suffix, "")
            if chop_off_service_name:
                service_name = service_name[:-3]

            values = self.kvdb.conn.hgetall(key)

            stats = service_stats.setdefault(service_name, {})

            for name in STATS_KEYS:

                value = values.get(name)
                if value:
                    if name in ("rate", "mean"):
                        value = float(value)
                    else:
                        value = int(value)

                    if not name in stats:
                        if name == "mean":
                            stats[name] = []
                        elif name == "min":
                            stats[name] = maxint
                        else:
                            stats[name] = 0

                    if name == "usage":
                        stats[name] += value
                    elif name == "max":
                        stats[name] = max(stats[name], value)
                    elif name == "mean":
                        stats[name].append(value)
                    elif name == "min":
                        stats[name] = min(stats[name], value)

        for service_name, values in service_stats.items():
            mean = values.get("mean")
            if mean:
                values["mean"] = sp_stats.tmean(mean)

            if needs_rate:
                values["rate"] = values["usage"] / total_seconds

        return service_stats
Пример #48
0
def get_stats_Emmanuelle(stats_type, groups, *data):
    '''
        author: version adjusted by Emmanuelle Mazur-Lainé 202206
    Args: type of stats, data
    Return: float
    '''

    data = data[0]
    nbr_gr = len(groups)

    if stats_type == 'mean':
        res_stats = ()
        for group in data:
            res = stats.tmean(group)
            res_stats += (res, )
        return (res_stats), 'mean'
    if stats_type == 'std':
        res_stats = ()
        for group in data:
            res = stats.tstd(group)
            res_stats += (res, )
        return res_stats, 'std'
    elif stats_type == 'kurtosis':
        res_stats = ()
        for group in data:
            res = stats.kurtosis(group)
            res_stats += (res, )
        return res_stats, 'kurtosis'
    elif stats_type == 'skewness':
        res_stats = ()
        for group in data:
            res = stats.skew(group)
            res_stats += (res, )
        return res_stats, 'skewness'

    elif stats_type == 'TTest':
        return stats.ttest_ind(data[0], data[1], equal_var=True), ('t', 'p')
    elif stats_type == 'Welch':
        return stats.ttest_ind(data[0], data[1], equal_var=False), ('t', 'p')
    elif stats_type == 'MannWhitneyu':
        try:
            return stats.mannwhitneyu(data[0], data[1]), ('u', 'p')
        except ValueError:
            return (0, 0), ('h', 'p')

    ########### RESTE À TROUVER COMMENT METTRE TOUS LES GROUPES
    # EN PRAMÈTRES DES TESTS BARTLETT, KRUSKAL ET ANOVA####

    elif stats_type == 'Bartlett':
        return stats.bartlett(*data), ('t', 'p'
                                       )  # Bartlett, tests the null hypothesis
    elif stats_type == 'Kruskal':
        try:
            return stats.kruskal(*data), ('h', 'p')
        except ValueError:
            return (0, 0), ('h', 'p')
    elif stats_type == 'ANOVA':
        return stats.f_oneway(*data), ('t', 'p'
                                       )  #One way ANOVA, checks the variance
Пример #49
0
    def main(self) -> float:
        """
        One iteration of main loop of the service.
        Suppose to return sleep time im seconds
        """
        start_mark = datetime.now()

        measurements = array('i')

        attempt = 0
        while (datetime.now() - start_mark).total_seconds() < self.measure_duration and not ExitEvent().is_set():
            try:
                attempt += 1
                measurements.append(self._measure())
            except DistanceMeasureException as exception:
                self.log.critical(f'Unsuccessful {attempt} attempt to measure', exception)
            if self.measure_attempts_pause_time > 0:
                ExitEvent().wait(self.measure_attempts_pause_time)

        if len(measurements) > 0:
            # assumed the reading was successful in technical terms
            # unfortunately the reading sometimes (quite often) can be invalid - unreliable

            current_level = int(stats.mode(measurements, nan_policy='omit').mode[0])
            current_level_mean = stats.tmean(measurements)

            last_reliable_reading = self._get_last_reliable_cesspit_reading()
            last_stored_reading = self._get_last_stored_cesspit_reading()

            if self._is_reliable(current_level, last_reliable_reading) \
                    or self._is_reliable(current_level, last_stored_reading):
                self.log.info(f'OK {len(measurements)} measurements, '
                              f'mode: {current_level} [mm] ({self._get_fill_percentage(current_level):.2f} [%]), '
                              f'mean: {current_level_mean:.2f}')

                self._set_last_reliable_cesspit_reading(current_level)

                if self._do_store_reading(current_level, last_stored_reading):
                    self._add_cesspit_reading(current_level)

                self._react_on_level(self._get_fill_percentage())

            else:
                speed = (last_reliable_reading.level - current_level) / \
                        ((datetime.now() - last_reliable_reading.timestamp).total_seconds()/3600)
                self.log.info(f'UNRELIABLE! {len(measurements)} measurements, '
                              f'mode: {current_level} [mm] ({self._get_fill_percentage(current_level):.2f} [%]), '
                              f'increase {last_reliable_reading.level - current_level} [mm],'
                              f'mean: {current_level_mean:.2f}, '
                              f'variance: {stats.variation(measurements):.2f}, '
                              f'increase speed: {speed:.4f} [mmph]')
                # signalize failure
                self._react_on_failure()

        else:
            self.log.critical(f"All attempts to measure the level failed")
            self._react_on_failure()

        return self._get_polling_period() - (datetime.now()-start_mark).total_seconds()
Пример #50
0
def main(argv):
    args = ARGS.parse_args()

    count = args.count
    concurrency = args.concurrency
    verbose = args.verbose
    tries = args.tries

    loop = asyncio.get_event_loop()
    suite = [run_aiohttp, run_tornado, run_twisted]

    suite *= tries
    random.shuffle(suite)

    all_times = collections.defaultdict(list)
    all_rps = collections.defaultdict(list)
    for test in suite:
        test_name = test.__name__

        rps, times = loop.run_until_complete(run(test, count, concurrency,
                                                 loop=loop, verbose=verbose,
                                                 profile=args.profile))
        all_times[test_name].extend(times)
        all_rps[test_name].append(rps)

    if args.profile:
        profiler.dump_stats('out.prof')

    print()

    for test_name in sorted(all_rps):
        rps = array(all_rps[test_name])
        times = array(all_times[test_name]) * 1000

        rps_mean = tmean(rps)
        times_mean = tmean(times)
        times_stdev = tstd(times)
        times_median = float(median(times))
        print('Results for', test_name)
        print('RPS: {:d},\tmean: {:.3f} ms,'
              '\tstandard deviation {:.3f} ms\tmedian {:.3f} ms'
              .format(int(rps_mean),
                      times_mean,
                      times_stdev,
                      times_median))
    return 0
Пример #51
0
def main(argv):
    args = ARGS.parse_args()

    count = args.count
    concurrency = args.concurrency
    verbose = args.verbose
    tries = args.tries

    loop = asyncio.get_event_loop()
    suite = [run_aiohttp, run_tornado, run_twisted]

    suite *= tries
    random.shuffle(suite)

    all_times = collections.defaultdict(list)
    all_rps = collections.defaultdict(list)
    for test in suite:
        test_name = test.__name__

        rps, times = loop.run_until_complete(run(test, count, concurrency,
                                                 loop=loop, verbose=verbose,
                                                 profile=args.profile))
        all_times[test_name].extend(times)
        all_rps[test_name].append(rps)

    if args.profile:
        profiler.dump_stats('out.prof')

    print()

    for test_name in sorted(all_rps):
        rps = array(all_rps[test_name])
        times = array(all_times[test_name]) * 1000

        rps_mean = tmean(rps)
        times_mean = tmean(times)
        times_stdev = tstd(times)
        times_median = float(median(times))
        print('Results for', test_name)
        print('RPS: {:d},\tmean: {:.3f} ms,'
              '\tstandard deviation {:.3f} ms\tmedian {:.3f} ms'
              .format(int(rps_mean),
                      times_mean,
                      times_stdev,
                      times_median))
    return 0
Пример #52
0
 def norm_fit(self, show_it=0, save_it=0, save_dir=None, save_name=None):
     _guess = [stats.tmean(self.x), stats.tstd(self.x)]
     _x = self.x
     _x.sort()
     self.norm_params, self.norm_params_covariance = optimize.curve_fit(
         self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess)
     self.hist_norm_of_move(
         show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name)
Пример #53
0
def trim_mean(arr, proportion):
    """
    """
    #TODO: windowing (window len) and avoid error try:
    # except: np.sort(p)[window_len/2]
    percent = proportion*100.
    lower_lim = scoreatpercentile(arr, percent/2)
    upper_lim = scoreatpercentile(arr, 100-percent/2)
    tm = tmean(arr, limits=(lower_lim, upper_lim), inclusive=(False, False))
    return tm
Пример #54
0
def get_envelope(data):
	env = abs(signal.hilbert(data))
	env = env.astype('float')
	# normalize by max
	# env = env/max(env)

	# normalize by mean in top 10%
	p = scoreatpercentile(env,90)
	pm = tmean(env,limits=(p,max(env)))
	env = env/pm

	return env
Пример #55
0
  def compute_metric(self):

    peak = 76.*1.e9
    gdramrate = numpy.zeros(len(self.ts.t)-1)
    for h in self.ts.j.hosts.keys():
      gdramrate += numpy.divide(numpy.diff(64.*self.ts.assemble([0,1],h,0)),
                                numpy.diff(self.ts.t))

    mdr=tmean(gdramrate)/self.ts.numhosts
    self.metric = mdr/peak

    return
Пример #56
0
 def EstimateParameters(self, data):
     # we use our know equations to calculate MLEs for the distributions we use, but
     # we store the values in the same way scipy's MLE calculation function (fit) returns them,
     # so they can be easily passed to the distribution later.
     if self.dist.name == "uniform":
         minD = min(data) # MLE lower bound = min(data)
         maxD = max(data) # MLE upper bound = max(data)
         # uniform value range is [loc, loc+scale]
         self.mle = [floor(minD), ceil(maxD-minD)]
     elif self.dist.name == "norm":
         dataMean = tmean(data) # MLE mean = mean(data)
         dataStd = tstd(data)   # MLE variance = variance(data)
         # loc = mean
         # scale = standard deviation
         self.mle = [dataMean, dataStd]
     elif self.dist.name == "expon":
         # MLE lambda = 1 / mean(data)
         # scale = 1 / lambda
         self.mle = [0, tmean(data)]
     else:
         self.mle = seld.dist.fit(data)
Пример #57
0
def getMean(flux,limits=(None,None),wave=None,wmin=None,wmax=None,minsize=20):

    '''
    Get the mean of a flux array in a given wavelength range. If no wavelengths 
    are given, the mean of the whole array is given.
    
    If the array used for mean calculation is too small, None is returned.
    
    A 1-sigma clipping of the flux array can be done by providing limits.
    
    @param flux: The wavelength array
    @type flux: array

    @keyword limits: Flux limits if flux clipping (1 sigma!) is needed before
                     Meancalculation. None for both limits implies no clipping.
                     None for one of the limits implies a half-open interval.
                     
                     (default: (None,None))
    @type limits: (float,float)
    @keyword wave: The wavelength array. If default, the Mean is calculated of 
                   the whole flux array
    @type wave: array    
    @keyword wmin: The minimum wavelength. If not given, the minimum wavelength 
                   is the first entry in the wave array
                    
                   (default: None)
    @type wmin: float
    @keyword wmin: The maximum wavelength. If not given, the maximum wavelength 
                   is the last entry in the wave array
                    
                   (default: None)               
    @type wmax: float
    @keyword minsize: The minimum size of the selected array before proceeding
                      with the noise calculation. 0 if no min size is needed.
                             
                      (default: 20)
    @type minsize: int
    
    @return: The flux mean between given wavelengths
    @rtype: float
    
    '''
    
    fsel = selectArray(flux,wave,wmin,wmax)
    if fsel.size <= minsize:
        return None
    if limits == (None,None):
        return mean(fsel)
    else:
        return tmean(fsel,limits=limits)
Пример #58
0
def getLineScoreStats(df,lineScoreCol,histScoreCol,binNumber=50):
	'''Return a Dataframe of line score stats for each bin. Relevant
	one is probably the mean.'''
	D = {}
	binnedScores = binLineScore(df,lineScoreCol,histScoreCol,binNumber)
	for bin in binnedScores:
		L = binnedScores[bin]
		if len(L) <=1:
			mean,var,dev = L[0],0,0
			continue
		mean = stats.tmean(L)
		var = stats.tvar(L)
		stanD = stats.tstd(L)
		D[bin] = {"mean":mean,"var":var,"stanDev.": stanD}
	return pd.DataFrame(D).T
Пример #59
0
	def fit(self, X, y):
		self.business_winner_bias = {}
		business_review_votes = defaultdict(list)
		for review in self.data.training_reviews.values():
			business_review_votes[review['business_id']].append(review['votes']['useful'])
		for business_id, review_votes in business_review_votes.iteritems():
			median = cmedian(review_votes)
			mean = tmean(review_votes)
			if len(review_votes) > 0 and mean != 0:
				bias = median / mean
			else:
				bias = 1
			self.business_winner_bias[business_id] = bias
			
		return self