Пример #1
0
def run(data):
    f = open("analyzer.log", 'a+')
    c = costs(data)
    total = total_cost(data)
    f.write("\n############# COST #############\n")
    f.write("Total Cost : {0}\n".format(total))
    f.write("Total Cost Mean: {0}\n".format(mean(c)))
    f.write("Total Cost Median: {0}\n".format(median(c)))
    f.write("Total Cost Mode: {0}\n".format(mode(c)))
    f.write("Total Cost Variance: {0}\n".format(variance(c)))

    cost_action = action(data)
    f.write("Cost by Action: \n")
    for k, v in cost_action.iteritems():
        f.write("\t{0} -> {1} units\n".format(k, v))

    f.write("Percentage Cost by Action: \n")
    for k, v in cost_action.iteritems():
        f.write("\t{0} -> {1} %\n".format(k, round(((v * 100.) / total), 2)))

    f.write("Cost Variance by Action: \n")
    for k, v in cost_action.iteritems():
        c_action = costs_action(data, k)
        if len(c_action) > 1:
            f.write("\t{0} -> {1} units\n".format(k, round(variance(c_action), 2)))
        else:
            f.write("\t{0} -> {1} units\n".format(k, round(c_action[0], 2)))

    key_max, max_value = max_action_value(cost_action)
    f.write("More Expensive Action by value: {0} -> {1}\n".format(key_max[0], cost_action.get(key_max[0])))

    key_max, max_value = max_action_percentage(cost_action, total)
    f.write("More Expensive Action by percentage: {0} -> {1} %\n".format(key_max, round(max_value, 2)))

    f.close()
def model_analysis(x, x_matrix, y, line, y_hat, b):
    n = len(x) # number of samples
    s_x = stats.stdev(x) # standard deviation of x values
    s_y = stats.stdev(y) # standard deviation of y values
    s2_x = stats.variance(x) # variance of x values
    s2_y = stats.variance(y) # variance of y values
    s_xy = b * s2_x # covariance of VM
    
    mad_temp = 0
    SSE = 0
    for i in range(len(y)):
        temp = abs(y[i] - y_hat[i])
        mad_temp += temp
        SSE += temp**2 # sum of squares for error
    MAD = mad_temp / n    
    s_err = math.sqrt(SSE / (n - 2)) # standard error of estimate
    s_b = s_err / math.sqrt((n - 1) * s2_x)
    
    r = s_xy / (s_x * s_y) # sample coefficient of correlation
    R_2 = line.score(x_matrix, y) # coefficient of determination 
    R_2calc = s_xy**2 / (s2_x * s2_y)
    t = b / s_b # t-value for slope assuming true slope = 0
    
    f1.write('\nSkew = ' + str(b) + '\n')
    f1.write('Coefficient of correlation (r) = ' + str(r) + '\n')
    #f1.write('Coefficient of determination (R^2) via scikit = ' + str(R_2) + '\n')
    f1.write('Coefficient of determination (R^2) calculate = ' + str(R_2calc) + '\n')
    f1.write('Test statistic for clock skew (t) = ' + str(t) + '\n')
    f1.write('Mean Absolute Deviation (MAD) = ' + str(MAD) + '\n')
    f1.write('Sum of Squares for Forecast Error (SSE) = ' + str(SSE) + '\n')
    
    return
def main():

    statistics_file = open("statics.txt", "w")

    stats = {}

    final_statistics = {}

    entropy_kills = []
    entropy_dies = []

    initialize_server()

    stats = simulate_population()

    print(statistics)

    for key, val in stats.items() :
        entropy_kills += val[0]
        entropy_dies += val[1]

    print(str(entropy_kills))
    statistics_file.write(str(entropy_kills) + "\n")

    print(str(entropy_dies))
    statistics_file.write(str(entropy_dies) + "\n")


    final_statistics.update({"mean entropy kill " : statistics.mean(entropy_kills)})
    final_statistics.update({"variance entropy kill " : statistics.variance(entropy_kills)})
    final_statistics.update({"mean entropy_dies " : statistics.mean(entropy_dies) })
    final_statistics.update({"variance entropy_dies " : statistics.variance(entropy_dies) })

    for key, val in final_statistics.items():
        print(str(key) + " : " + str(val))
        statistics_file.write(str(key) + " : " + str(val) + "\n")

    plt.figure(1)

    plt.subplot(211)

    plt.xlabel('id bot')

    plt.ylabel('kill')

    plt.boxplot(entropy_kills, labels=list('K'))

    plt.subplot(212)

    plt.xlabel('id bot')

    plt.ylabel('dies')

    plt.boxplot(entropy_dies, labels=list('D'))

    plt.show()

    statistics_file.close()
Пример #4
0
def countXYZ(data):
	x=[]
	y=[]
	z=[]
	for tmp in data:
		x.append(float(int(tmp[0:3],16))*12.0/4096.0)
		y.append(float(int(tmp[3:6],16))*14.0/4096.0)
		z.append(float(int(tmp[6:9],16))*36.0/4096.0)
	return [6.0-sum(x)/len(x),7.0-sum(y)/len(y),sum(z)/len(z),\
	statistics.variance(x),statistics.variance(y),statistics.variance(z)]
Пример #5
0
def caculation(data):
    trt_1 = data[data['trt'] == 1]
    trt_0 = data[data['trt'] == 0]
    
    medi = statistics.median(trt_1['y']) - statistics.median(trt_0['y'])
    mean = statistics.mean(trt_1['y']) - statistics.mean(trt_0['y'])
    peop = len(trt_1) + len(trt_0)
    vari = statistics.variance(trt_1['y']) + statistics.variance(trt_0['y'])
    z_stat, p_val = stats.ranksums(trt_0['y'], trt_1['y']) 
    return [medi, mean, peop, p_val]
Пример #6
0
    def adjRating(self, ratings, VERBOSE=False):
        new_ratings = {}
        change = False
        max_change = 0.0
        for team in self.teams:
            # Set up arrays for ORating and DRating
            ODiff = []
            DDiff = []
            ct = self.teams[team]

            if VERBOSE:
                print("%s" % ct.name)
                print(" Home Games:")
            for game in ct.home_games:
                ODiff.append(game.hs + ratings[game.at.name][1])
                DDiff.append(game.aws - ratings[game.at.name][0])
                if VERBOSE:
                    print("  %s: "
                          "ODiff Entry=%.2f, "
                          "DDiff Entry=%.2f" % (game, ODiff[len(ODiff)-1],
                                              DDiff[len(DDiff)-1]))

            if VERBOSE:
                print(" Away Games:")
            for game in ct.away_games:
                ODiff.append(game.aws + ratings[game.ht.name][1])
                DDiff.append(game.hs - ratings[game.ht.name][0])
                if VERBOSE:
                    print("  %s: "
                          "ODiff Entry=%.2f, "
                          "DDiff Entry=%.2f" % (game, ODiff[len(ODiff)-1],
                                              DDiff[len(DDiff)-1]))

            temp_AdjO = (sum(ODiff)/float(len(ODiff))) - self.average
            temp_AdjD = self.average - (sum(DDiff)/float(len(DDiff)))
            ct.AOV = statistics.variance(ODiff)/len(ODiff)
            ct.ADV = statistics.variance(DDiff)/len(DDiff)
            new_ratings.update({ct.name: [temp_AdjO, temp_AdjD]})
            ch_th_O = abs((temp_AdjO-ratings[ct.name][0])/ratings[ct.name][0])
            ch_th_D = abs((temp_AdjD-ratings[ct.name][1])/ratings[ct.name][1])

            max_change = max(ch_th_D, ch_th_O, max_change)
            if ch_th_O > .0025 or ch_th_D > .0025:
                change = True
            if VERBOSE:
                print(" Team Stats")
                print("  AdjPointsScored: %s" % ODiff)
                print("  AdjPointsAllowed: %s" % DDiff)
                print("  AdjO=%.2f, AdjD=%.2f, "
                      "ChO=%.4f, ChD=%.4f" %
                      (temp_AdjO, temp_AdjD, ch_th_O, ch_th_D))
                print("  AOV=%.4f, ADV=%.4f" % (ct.AOV, ct.ADV))
        # print("%.4f" % max_change)
        return change, new_ratings
Пример #7
0
    def get_statistics(self):
        """
        Returns various statistics about the benchmark run.
        :return: See description.
        """
        ret_val = {}
        not_applicable = 'N/A'

        if len(self.rollovers) > 1:  # Skip the last rollover...it could've been smaller than chunk_size
            rollover_times = []
            last_time = self.started
            for i in xrange(len(self.rollovers) - 1):
                rollover_times.append(
                    (self.rollovers[i] - last_time).total_seconds()
                )
                last_time = self.rollovers[i]

            ret_val['rollover_mean'] = self.num_to_seconds(mean(rollover_times))
            ret_val['rollover_stdev'] = self.num_to_seconds(stdev(rollover_times))
            ret_val['rollover_variance'] = self.num_to_seconds(variance(rollover_times))
        else:
            ret_val['rollover_mean'] = not_applicable
            ret_val['rollover_stdev'] = not_applicable
            ret_val['rollover_variance'] = not_applicable

        if self.initial_mem_usage is None:
            ret_val['initial_mem_usage'] = not_applicable
        else:
            ret_val['initial_mem_usage'] = self.num_to_megabytes(self.initial_mem_usage)

        if len(self.resources) > 0:
            cpu_util_list = [x[1] for x in self.resources]
            ret_val['cpu_util_mean'] = self.num_to_percent(mean(cpu_util_list))
            ret_val['cpu_util_stdev'] = self.num_to_percent(stdev(cpu_util_list))
            if len(cpu_util_list) > 1:
                ret_val['cpu_util_variance'] = self.num_to_percent(variance(cpu_util_list))
            else:
                ret_val['cpu_util_variance'] = not_applicable

            mem_usage_list = [x[2] for x in self.resources]
            ret_val['mem_usage_mean'] = self.num_to_megabytes(mean(mem_usage_list))
        else:
            ret_val['cpu_util_mean'] = not_applicable
            ret_val['cpu_util_stdev'] = not_applicable
            ret_val['cpu_util_variance'] = not_applicable

            ret_val['mem_usage_mean'] = not_applicable

        return ret_val
Пример #8
0
def DemoStatistic(dataParam):

	#calculate mean
	mean = statistics.mean(dataParam);

	#calculate median
	median = statistics.median(dataParam);

	#calculate standard deviation
	stdv = statistics.variance(dataParam);

	#count values outside 3 sigma range
	noiseCount = 0;
	for value in dataParam:
		if value < (-3* stdv) + mean or value > (3*stdv) + mean:
			#print(" %.4f" %(value));
			noiseCount += 1;

			
	print("-----------------Simple------------------------");
	print("Data length: %d" %(len(dataParam)) );
	print("Values outside 3sigma: %d" %(noiseCount) );
	print("Mean: %.7f" %(mean));
	print("Median: %.7f"%(median));
	print("Standard deviation: %.7f"%(stdv));
	print("------------------------------------------------");
Пример #9
0
def analyze(graphs):
    """summary stats for the graphs:
    >>> graphs = [{'win': ['a', 'b', 'philosophy']}, {'win': ['c', 'd', 'philosophy']}, {'fail': ['e', 'f', 'g', 'h', 'i', 'j', 'k']}]
    >>> analyze(graphs)
    ... {'min': 2, 'max': 2, 'mean': 2.0, 'median': 2.0, 'var': 0.0}
    """
    win_path_lengths = []
    fail_path_lengths = []

    for graph in graphs:
        if graph.get('win'):
            win_path_lengths.append(len(graph['win']) - 1)
        if graph.get('fail'):
            fail_path_lengths.append(len(graph['fail']) - 1)

    #stats
    win_perc = sum(win_path_lengths)/sum([sum(win_path_lengths), sum(fail_path_lengths)])
    min_path_length = min(win_path_lengths)
    max_path_length = max(win_path_lengths)
    mean_path_length = mean(win_path_lengths)
    median_path_length = median(win_path_lengths)
    var_path_length = variance(win_path_lengths)

    print('Cache is enabled by default, turning it off will affect the distributions')
    print('Percentage of pages leading to Philosophy: {}'.format(win_perc))
    print('Distribution of paths leading to Philosophy: min {}, max {}, mean {}, median {}, var {}'.format(
           min_path_length, max_path_length, mean_path_length, median_path_length, var_path_length))

    return dict(min=min_path_length,
                max=max_path_length,
                mean=mean_path_length,
                median=median_path_length,
                var=var_path_length)
Пример #10
0
def discard_spurious_lines(lines, expected):
    """ Discards the discordant line(s).

    Discards the line or lines that minimizes the variance
    of the distances between the set of the other lines
    (of size expected).

    The hypothesis is that, when more lines than expected
    were detected, the variance of the distances bewteen
    lines will be minimized when the spurious lines are
    out of the set of selected lines.

    Be aware of the number of possible combinations before
    calling this function. Having just one or two extra lines
    should generally be ok.

    """
    best_variance = math.inf
    for combination in itertools.combinations(lines, expected):
        diffs = [b[0] - a[0]
                 for a, b in zip(combination[:-1], combination[1:])]
        variance = statistics.variance(diffs)
        if variance < best_variance:
            best_combination = combination
            best_variance = variance
    return best_combination
def makeCharactOutOfEndDifferencesList(listdiffs, type=STAT_MEAN):
    """
    type=0, mean
    1, variance
    2, sigma
    """
    import copy
    import statistics
    import scipy.stats as scps

    lst = copy.deepcopy(listdiffs)

    func = {
        -1: statistics.median,
        0: statistics.mean,
        1: statistics.variance,
        2: lambda x: math.sqrt(statistics.variance(x)),
        3: scps.skewtest,
        4: scps.kurtosistest,
    }

    for j in range(len(lst)):
        for k in range(len(lst[j])):
            lst[j][k] = func[type](lst[j][k])

    return lst
Пример #12
0
 def compute(self, ydata):
     y = ydata.T
     var_y = statistics.variance(y)
     self.summary = self.summary+var_y
     self.var_y.append(var_y)
     return { 'summary': self.summary,
              'var_y': self.var_y }
Пример #13
0
 def compute(self, ydata):
     y = np.swapaxes(ydata,0,1).T
     var_y = sum([statistics.variance(y[w])/self.meta.nwalkers for w in xrange(self.meta.nwalkers)])
     self.summary = self.summary+var_y
     self.var_y.append(var_y)
     return { 'summary': self.summary,
              'var_y': self.var_y }
Пример #14
0
    def async_update(self):
        """Get the latest data and updates the states."""
        if self._max_age is not None:
            self._purge_old()

        if not self.is_binary:
            try:  # require only one data point
                self.mean = round(statistics.mean(self.states), 2)
                self.median = round(statistics.median(self.states), 2)
            except statistics.StatisticsError as err:
                _LOGGER.error(err)
                self.mean = self.median = STATE_UNKNOWN

            try:  # require at least two data points
                self.stdev = round(statistics.stdev(self.states), 2)
                self.variance = round(statistics.variance(self.states), 2)
            except statistics.StatisticsError as err:
                _LOGGER.error(err)
                self.stdev = self.variance = STATE_UNKNOWN

            if self.states:
                self.count = len(self.states)
                self.total = round(sum(self.states), 2)
                self.min = min(self.states)
                self.max = max(self.states)
                self.change = self.states[-1] - self.states[0]
                self.average_change = self.change
                if len(self.states) > 1:
                    self.average_change /= len(self.states) - 1
                if self._max_age is not None:
                    self.max_age = max(self.ages)
                    self.min_age = min(self.ages)
            else:
                self.min = self.max = self.total = STATE_UNKNOWN
                self.average_change = self.change = STATE_UNKNOWN
Пример #15
0
def getVariances(optimalParameters):
    params = optimalParameters[0].keys()
    variances = {}
    for parameter in params:
        allValues = [x[parameter] for x in optimalParameters]
        variances[parameter] = statistics.variance(allValues)
    return variances
Пример #16
0
    def test_parametric_variates(self):
        """
        Verify the correctness of the random variates generation.
        :return: None
        """
        for variate in Variate:
            params = self.varparams[variate]
            sample = list()
            for i in range(self.samsize):
                rndvalue = Variate[variate.name].vargen.generate(u=self.rndgen, **params)
                sample.append(rndvalue)

            expected_mean = self.check_mean[variate](**params)
            actual_mean = mean(sample)
            print("{}: expected mean {}, got {}".format(variate.name, expected_mean, actual_mean))

            if self.makeAssertion:
                self.assertLessEqual(abs(expected_mean - actual_mean) / expected_mean,
                                     self.err * expected_mean,
                                     "Mean error for variate {}: expected {} got {}"
                                     .format(variate.name, expected_mean, actual_mean))

            expected_variance = self.check_variance[variate](**params)
            actual_variance = variance(sample)
            print("{}: expected variance {}, got {}".format(variate.name, expected_variance, actual_variance))

            if self.makeAssertion:
                self.assertLessEqual(abs(expected_variance - actual_variance) / expected_variance,
                                     self.err * expected_variance,
                                     "Variance error for variate {}: expected {} got {}"
                                     .format(variate.name, expected_variance, actual_variance))
Пример #17
0
    def post_trigger_run(self, trigger: RawTrigger, main_plugin: MainPlugin, *args, **kwargs) -> None:
        """
        Collects the benchmark results and saves them in a file
        :param trigger: the trigger instance that is run
        :param main_plugin: the main plugin under which we run
        :param args: additional arguments
        :param kwargs: additional keyword arguments
        """
        if len(trigger.returned_information) == 1:
            mean = trigger.returned_information[0]
            stdev = 0
            variance = 0
        else:
            mean = statistics.mean(trigger.returned_information)
            stdev = statistics.stdev(trigger.returned_information)
            variance = statistics.variance(trigger.returned_information)

        if not os.path.exists(os.path.dirname(self.benchmark_log)):
            os.makedirs(os.path.dirname(self.benchmark_log))

        with open(self.benchmark_log, "a") as logs:
            logs.write("{name}, {plugin}, {slice_size}, {mean}, {stdev}, {variance} {total_numbers}\n".format(
                name=trigger.conf.get("name"),
                plugin=main_plugin.__class__.__name__,
                slice_size=kwargs.get("number", None),
                mean=mean,
                stdev=stdev,
                variance=variance,
                total_numbers=" ".join([str(data) for data in trigger.returned_information])))
Пример #18
0
def get_stats(arr):
    min_ = min(arr)
    max_ = max(arr)
    range_ = max_ - min_
    mean_ = statistics.mean(arr)
    median_ = statistics.median(arr)
    amp_ = max_ - mean_
    try:
        stdev_ = statistics.stdev(arr)
        var_ = statistics.variance(arr)
    except:
        stdev_ = 0
        var_ = 0
    rms_ = rms(arr)

    result = []
    result.append(min_)
    result.append(max_)
    result.append(range_)
    result.append(mean_)
    result.append(median_)
    result.append(amp_)
    result.append(stdev_)
    result.append(var_)
    result.append(rms_)
    return result
Пример #19
0
def stats_of_tab(fname):
    # Create a list of "statistics" objects, keyed by title from the spreadsheet
    tbl = []
    with open(fname, "r") as f:
        next(f) # Ignore first line, its just an index
        for line in f:
            cols = line.strip().split()
            title = cols[0]
            # Collect statistics on each row
            data = [int(x) for x in cols[1::]]
            obj = {}
            obj["key"] = cols[0]
            obj["num-samples"] = len(data)
            obj["mean"] = statistics.mean(data)
            obj["median"] = statistics.median(data)
            obj["min"] = min(data)
            obj["max"] = max(data)
            obj["range"] = obj["max"] - obj["min"]
            obj["std"] = statistics.stdev(data)
            obj["variance"] = statistics.variance(data)
            ci_offset = (Z95 * obj["std"]) / (math.sqrt(obj["num-samples"]))
            obj["confidence-interval"] = [obj["mean"] - ci_offset,
                                          obj["mean"] + ci_offset]
            tbl.append(obj)
    return tbl
Пример #20
0
 def get(self):
     values = sorted(self.reservoir.values)
     count = len(values)
     # instead of failing return empty / subset so that json2insert & co
     # don't fail
     if count == 0:
         return dict(n=0)
     elif count == 1:
         return dict(min=values[0], max=values[0], mean=values[0], n=count)
     percentiles = [percentile(values, p) for p in self.plevels]
     min_ = values[0]
     max_ = values[-1]
     stdev = statistics.stdev(values)
     return dict(
         min=min_,
         max=max_,
         mean=statistics.mean(values),
         median=statistics.median(values),
         variance=statistics.variance(values),
         error_margin=error_margin(95, stdev, self.reservoir.count),
         stdev=stdev,
         # replace . with _ so that the output can be inserted into crate
         # crate doesn't allow dots in column names
         percentile={str(i[0]).replace('.', '_'): i[1] for i in
                     zip(self.plevels, percentiles)},
         n=self.reservoir.count,
         samples=self.reservoir.values
     )
Пример #21
0
    def getVar(self):
        if(self.index == 1):
            return 0
        elif(self.index < self.N):
            return statistics.variance(self.window[0:self.index]) # Make return 0?

        return self.variance
    def get_weight_variance(self, *args, **kwargs):
        Weight = apps.get_model('ddm_core', 'Weight')

        weights = Weight.objects.filter(criterion=self, *args, **kwargs).values_list('value', flat=True)
        try:
            return statistics.variance(weights)
        except statistics.StatisticsError:
            return 0
Пример #23
0
def computeFScore(protList,dataTCSs,dataCCSs,combTCSsCCSs):
    final_Prot={}
    topProteins={}
    for i in range(77):
        protein=protList[i]
        mean_TCSs=dataTCSs[protList[i]].mean()
        mean_CCSs=dataCCSs[protList[i]].mean()
        combo_mean=combTCSsCCSs[protList[i]].mean()
        numeratorFScore=(((mean_TCSs-combo_mean)**2)+(mean_CCSs-combo_mean)**2)
        denominatorFScore=((stat.variance(dataTCSs[protList[i]]))+(stat.variance(dataCCSs[protList[i]])))
        #denominatorFScore=((dataTCSs[protList[i]].var(ddof=True))+(dataCCSs[protList[i]].var(ddof=True)))
        FScore=numeratorFScore/denominatorFScore
        final_Prot[protein]=FScore
    
    sortedDict=sorted(final_Prot.items(), key=lambda x:x[1],reverse=True)
    topProteins=sortedDict[:5]
    return topProteins
Пример #24
0
def print_mean_var(sticker_prices):
    stock_variances = []
    for ticker in sticker_prices:
        open_prices = sticker_prices[ticker]
        if open_prices:
            stock_variances.append((ticker, statistics.mean(open_prices), statistics.variance(open_prices)))
    data = sorted(stock_variances, key=operator.itemgetter(2))
    print '\n'.join([str(a) for a in data])
    def get_score_variance(self, *args, **kwargs):
        Score = apps.get_model('ddm_core', 'Score')

        scores = Score.objects.filter(criterion=self, *args, **kwargs).values_list('value', flat=True)
        try:
            return statistics.variance(scores)
        except statistics.StatisticsError:
            return 0
Пример #26
0
def standardize(x):
    if x.shape[0] == 0:
        return
    for i in range(1, x.shape[1]):
        variance = stat.variance(x[:, i])
        mean = stat.mean(x[:, i])
        if variance == 0:
            break
        x[:, i] = (x[:, i] - mean)/variance
def GARCH11_logL(param, r):
    omega, alpha, beta = param
    n = len(r)
    s = np.ones(n)*0.01
    s[2] = st.variance(r[0:3])
    for i in range(3, n):
        s[i] = omega + alpha*r[i-1]**2 + beta*(s[i-1])  # GARCH(1,1) model
    logL = -((-np.log(s) - r**2/s).sum())
    return logL
def print_stats(times, tag):
    print("Num. samples: %s" % len(times))
    mean = statistics.mean(times)
    print("Average %s: %.2f" % (tag, mean))
    print("Median %s: %s" % (tag, statistics.median(times)))
    print("Min %s: %s" % (tag, min(times)))
    print("Max %s: %s" % (tag, max(times)))
    delta = Z * (math.sqrt(statistics.variance(times)) / math.sqrt(len(times)))
    print("95%% confidence: %.2f -- %.2f" % (mean - delta, mean + delta))
Пример #29
0
    def is_interesting(self, name):
        #print(name.occurrences)
        occ = list(name.occurrences.values())
        #print(occ)
        for i in range(len(occ)):
            if occ[i] == 0:
                occ[i] = 1
        #print(occ)
        mean = np.mean(occ)
        stdev = np.std(occ)
        var = math.sqrt(variance(occ, mean))
        var2 = variance(occ, mean)
        unique = var2/ np.mean(occ)

        #normalized_values = (occ - np.mean(occ)) / np.std(occ)
       # variance = np.std(normalized_values) / np.mean(normalized_values)
        uniqueness = var / np.mean(occ)
        #print("vales before: ", occ, "values standardized: ", normalized_values, "the variance normalized: ", variance, "the varianced non normalized: ", variance2)
        if uniqueness > 0.20 and np.mean(occ) > self.span:
            #print("Mean: ", np.mean(occ))
            #print("Variance Level: ", variance)
            if uniqueness > 1.0 and np.mean(occ) > self.span:
                print("POPULARITY: very high potential")
                print("Variance Level: ", uniqueness)
                return True
            elif uniqueness > 0.90 and uniqueness < 1.0 and np.mean(occ) > self.span:
                print("POPULARITY: high potential")
                print("Variance Level: ", uniqueness)
                return True
            elif uniqueness > 0.50 and uniqueness < 0.80 and np.mean(occ) > self.span:
                print("POPULARITY: probably average or high")
                print("Variance Level: ", uniqueness)
                return True
            elif uniqueness > 0.39 and uniqueness < 0.50 and np.mean(occ) > self.span:
                print("Movie could be popular")
                print("Variance Level: ", uniqueness)
                return True
            elif uniqueness < 0.39 and np.mean(occ) > self.span:
                print("Movie name unrelevent to popularity")
                print("Variance Level: ", uniqueness)
                return True

        else:
            return False
Пример #30
0
    async def async_update(self):
        """Get the latest data and updates the states."""
        _LOGGER.debug("%s: updating statistics.", self.entity_id)
        if self._max_age is not None:
            self._purge_old()

        self.count = len(self.states)

        if not self.is_binary:
            try:  # require only one data point
                self.mean = round(statistics.mean(self.states),
                                  self._precision)
                self.median = round(statistics.median(self.states),
                                    self._precision)
            except statistics.StatisticsError as err:
                _LOGGER.debug("%s: %s", self.entity_id, err)
                self.mean = self.median = STATE_UNKNOWN

            try:  # require at least two data points
                self.stdev = round(statistics.stdev(self.states),
                                   self._precision)
                self.variance = round(statistics.variance(self.states),
                                      self._precision)
            except statistics.StatisticsError as err:
                _LOGGER.debug("%s: %s", self.entity_id, err)
                self.stdev = self.variance = STATE_UNKNOWN

            if self.states:
                self.total = round(sum(self.states), self._precision)
                self.min = round(min(self.states), self._precision)
                self.max = round(max(self.states), self._precision)

                self.min_age = self.ages[0]
                self.max_age = self.ages[-1]

                self.change = self.states[-1] - self.states[0]
                self.average_change = self.change
                self.change_rate = 0

                if len(self.states) > 1:
                    self.average_change /= len(self.states) - 1

                    time_diff = (self.max_age - self.min_age).total_seconds()
                    if time_diff > 0:
                        self.change_rate = self.average_change / time_diff

                self.change = round(self.change, self._precision)
                self.average_change = round(self.average_change,
                                            self._precision)
                self.change_rate = round(self.change_rate, self._precision)

            else:
                self.total = self.min = self.max = STATE_UNKNOWN
                self.min_age = self.max_age = dt_util.utcnow()
                self.change = self.average_change = STATE_UNKNOWN
                self.change_rate = STATE_UNKNOWN
Пример #31
0
 def variance(self):
     '返回DataStruct.price的方差 variance'
     return self.price.groupby(
         level=1).apply(lambda x: statistics.variance(x))
import statistics
import math

agesData = [10, 13, 14, 12, 11, 10, 11, 10, 15]

print(statistics.mean(agesData))
print(statistics.mode(agesData))
print(statistics.median(agesData))
print(statistics.variance(agesData))
print(statistics.stdev(agesData))
print(statistics.sqrt(statistics.variance(agesData)))
Пример #33
0
    1.984763432, 0.922213312, 3.327987169, 4.190056135, 5.493183641,
    1.864474739, 10.60545309, 2.425821973, 2.726543705, 8.740978348,
    6.174819567
]

percent.sort()
print(percent)

# this ends with error
# print(median(percent))
# print(median_low(percent))
# print(median_high(percent))

# this succeeds
import statistics

print(statistics.fmean(percent))
print(statistics.harmonic_mean(percent))
print(statistics.variance(percent, 2))

print('===========')
print(statistics.median(percent))
print(statistics.median_low(percent))
print(statistics.median_high(percent))

# this succeedes
from statistics import *

print(median(percent))
print(median_low(percent))
print(median_high(percent))
Пример #34
0
print('The predicted values are:', Yhat)
print('The true values are:', Ytrue[1:])

# ### 4- Calculate the residuals which is the difference between predicted values and true values and display it as:
# #### The residuals for this estimate are: ______________

# In[44]:

res = Ytrue[2:] - Yhat[1:]
print('The residuals for this estimate are: ', res)

# In[45]:

print('The mean of the residuals for this estimate is: ', statistics.mean(res))
print('\nThe variance of the residuals for this estimate is: ',
      statistics.variance(res))

plt.hist(res)
plt.title('Residuals Distribution', pad=40)
plt.xlabel('Residual value')
plt.ylabel('Number of Observations')
plt.figure()
plt.show()

# ### 5- Calculate the sum square of the residuals. This is norm is called “Sum Square Error” or simply “SSE”. Display the message as follow:
# #### The sum square error for this estimate is: _________

# In[47]:

sse = 0
SSE = []
Пример #35
0
def main():
    #Recebe o dia escolhido
    display(Image(filename='calendario.png')
            )  #Colocar a imagem do calendário na mesma pasta do arquivo python
    diaok = False
    while not diaok:
        dia = int(input("Digite o número do dia desejado (1-30): "))
        if dia in range(1, 31):
            diaok = True
        else:
            print("Input inválido, escolha outro valor de dia")
            print("================================================")

    data = padroniza_dia(
        dia)  #Padroniza o valor do dia para um string de dois dígitos

    #Recebe o agrupamento temporal escolhido
    agrupamentook = False
    while not agrupamentook:
        print(
            "Agrupamentos disponíveis: \n1Min \n2Min \n3Min \n4Min \n5Min \n")
        agrupamento = input("Digite o agrupamento temporal desejado: ")
        if agrupamento == "1Min" or agrupamento == "2Min" or agrupamento == "3Min" or agrupamento == "4Min" or agrupamento == "5Min":
            agrupamentook = True
        else:
            print("Input inválido, escolha outro valor de agrupamento")
            print("================================================")

    #Recebe o radar escolhido para previsão
    radares = [10426, 10433, 10482, 10484, 10492, 10500, 10521, 10531]
    radarok = False
    while not radarok:
        print("Radares disponíveis:", radares, sep='\n')
        radar = int(input("Qual o radar a ser previsto?: "))
        if radar in radares:
            radarok = True
        else:
            print("Input inválido, escolha outro valor de agrupamento")
            print("================================================")

    #Leitura do arquivo - Alterar com base na localização dos dados do seu computador
    path = "C:\\Users\\walmart\\Documents\\USP\\TCC\\Dados\\Dados Agrupados\\Grouped by frequency"
    dados = pd.read_csv(path + "\\" + str(dia) + "\\" + str(dia) + "_group_" +
                        str(agrupamento) + ".csv")
    dados.Data = pd.to_datetime(
        dados.Data)  #Transforma a coluna Data em formato Data
    dados = dados[dados['Número Agrupado'] == radar].reset_index(
        drop=True)  #Seleciona apenas o radar que estamos testando

    #Recebe o horário divisor entre treino/teste
    horaok = False
    while not horaok:
        hora = input(
            "Formato do horário esperado: 00:00:00 \n Qual o horário de corte entre treino/teste?: "
        )
        if len(hora) == 8 and hora[2] == ":" and hora[5] == ":" and int(
                hora[0] + hora[1]) in range(
                    0, 25) and int(hora[3] + hora[4]) in range(
                        0, 60) and int(hora[6] + hora[7]) in range(0, 60):
            horaok = True
        else:
            print("Input inválido, digite novamente o valor da hora")
            print("================================================")

    divisao = dados.loc[dados['Data'] == '2018-03-' + data + ' ' +
                        str(hora)].index[0]
    start = divisao - 100  #Quantos steps prévios usar para a previsão
    # Separa o dataframe do radar escolhido em treino e validação
    treino = dados.loc[start:divisao]
    teste = dados.loc[divisao:]

    #CHECAR SE A SÉRIE É ESTACIONÁRIA
    diferenciacao = dados.Quantidade
    estacionaria = adfuller(diferenciacao)
    print('ADF Statistic: %f' % estacionaria[0],
          '| p-value: %f' % estacionaria[1])
    if estacionaria[1] < 0.05:
        print("A série do radar %s é estacionária, pois p-value < 0.05" %
              radar)
        pronto = True
        d = 0
    else:
        print("A série do radar %s não é estacionária, pois p-value > 0.05" %
              radar)
        pronto = False
        d = 0
        while not pronto:
            diferenciacao = diferenciacao.diff().dropna()
            d = d + 1
            estacionaria = adfuller(diferenciacao)
            print('ADF Statistic: %f' % estacionaria[0],
                  '| p-value: %f' % estacionaria[1])
            if estacionaria[1] < 0.05:
                pronto = True
    print("A ordem de diferenciação é %d" % d)

    plt.rcParams.update({'figure.figsize': (9, 7), 'figure.dpi': 120})
    fig, axis = plt.subplots(2, 1, sharex=False)
    axis[0].plot(dados.Quantidade)
    axis[0].set_title('Série Original')
    axis[1].plot(diferenciacao)
    axis[1].set_title('Série Estacionária')
    plt.show()

    pronto = False
    while not pronto:
        #ORDEM DA AUTO REGRESSÃO - PACF COM DADOS ESTACIONÁRIOS
        plt.rcParams.update({'figure.figsize': (9, 7), 'figure.dpi': 120})
        fig, axis = plt.subplots(2, 1, sharex=False)
        axis[0].set(ylim=(0, 1.05))
        plot_pacf(diferenciacao, ax=axis[0], lags=100)

        #ORDEM DA MÉDIA MÓVEL - ACF COM DADOS ESTACIONÁRIOS
        axis[1].set(ylim=(0, 1.2))
        plot_acf(diferenciacao, ax=axis[1], lags=100)
        plt.show()

        p = int(input("Qual o valor de p adequado?: "))
        q = int(input("Qual o valor de q desejado?: "))

        #CONSTRUÇÃO DO MODELO
        steps = int(
            input("Quantos steps a frente das %s h deseja prever? " % hora))
        teste = teste[:steps +
                      1]  #Vai do horário de corte (dado) até 15+1 steps

        #A previsão vai do horário de corte+1step por 15 steps
        data_prev = dados.loc[divisao + 1:]
        data_prev = data_prev[:steps]

        #Instancia o modelo e faz o fit no treino
        model = ARIMA(treino['Quantidade'], order=(p, d, q))
        model_fit = model.fit(disp=-1)

        #Realiza o forecast
        fc, se, conf = model_fit.forecast(steps, alpha=0.05)  #95% de confiança

        #Valores da previsão recebem o index da data_prev
        fc_series = pd.Series(fc, index=data_prev.index)
        lower_series = pd.Series(conf[:, 0], index=data_prev.Data)
        upper_series = pd.Series(conf[:, 1], index=data_prev.Data)

        #Plota a previsão do modelo:
        fig, ax = plt.subplots(figsize=(12, 5))
        ax.plot(treino['Data'],
                treino['Quantidade'],
                label='Observações de fato, usadas para treino')
        ax.plot(teste['Data'],
                teste['Quantidade'],
                label='Comportamento de fato, após %s h' % hora)
        ax.plot(data_prev['Data'], fc_series, label='ARIMA')
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
        plt.fill_between(lower_series.index,
                         lower_series,
                         upper_series,
                         color='k',
                         alpha=0.15)
        plt.title('Local: Radar %d' % radar)
        plt.legend(loc='upper left', fontsize=10)
        plt.show()

        #TABELA DOS DADOS PREVISTOS
        dados_prev = pd.Series(fc, index=data_prev.Data)
        dados_prev = dados_prev.to_frame()
        dados_prev.columns = ['Quantidade']
        dados_prev['Data'] = dados_prev.index
        dados_prev = dados_prev[['Data', 'Quantidade']]
        dados_prev = dados_prev.reset_index(drop=True)
        #print("Previsão:")
        #print(dados_prev)
        #print("")

        #TABELA DOS DADOS REAIS
        dados_reais = teste[['Data', 'Quantidade']]
        dados_reais = dados_reais.reset_index(drop=True)
        dados_reais = dados_reais[1:]
        dados_reais = dados_reais.reset_index(drop=True)
        #print("Real:")
        #print(dados_reais)
        #print("")

        #CÁLCULO DO ERRO
        erros = list()
        for i in range(0, 15):
            erro = dados_reais.loc[i,
                                   'Quantidade'] - dados_prev.loc[i,
                                                                  'Quantidade']
            erros.append(abs(erro))

        erros = pd.Series(erros, index=data_prev.Data)
        erros = erros.to_frame()
        erros.columns = ['Erro']
        erros['Data'] = erros.index
        erros = erros[['Data', 'Erro']]
        erros = erros.reset_index(drop=True)
        #print(erros)

        var_erro = statistics.variance(erros.Erro)
        print("")
        print("A variância do erro dessa simulação foi de: %f" % var_erro)
        print("================================================")
        print(model_fit.summary())
        print("================================================")

        forecast_accuracy(fc, dados_reais.Quantidade)

        #DEFINE QUAL SERIA O MODELO IDEAL PARA A PREVISÃO
        modell = pm.auto_arima(dados.Quantidade,
                               start_p=0,
                               star_q=0,
                               test='adf',
                               max_p=p,
                               max_q=q,
                               m=1,
                               d=d,
                               seasonal=False,
                               start_P=0,
                               D=0,
                               trace=True,
                               error_action='ignore',
                               suppress_warnings=True,
                               stepwise=True)
        print(modell.summary())

        ok = input("O modelo está adequado? (S/N) ")
        if ok == "S":
            pronto = True
            print("Modelo definido para aplicação em outros horários")
            print("================================================")
        else:
            print("Escolha outras ordens de modelo")
            print("================================================")

    #SIMULAÇÃO DE OUTROS VALORES APÓS DEFINIÇÃO DO MODELO

    pronto = False
    while not pronto:
        horaok = False
        while not horaok:
            hora = input(
                "Formato do horário esperado: 00:00:00 \n Qual o horário de início da previsão?: "
            )
            if len(hora) == 8 and hora[2] == ":" and hora[5] == ":" and int(
                    hora[0] + hora[1]) in range(
                        0, 25) and int(hora[3] + hora[4]) in range(
                            0, 60) and int(hora[6] + hora[7]) in range(0, 60):
                horaok = True
                print("================================================")
            else:
                print("Input inválido, digite novamente o valor da hora")
                print("================================================")

        divisao = dados.loc[dados['Data'] == '2018-03-' + data + ' ' +
                            str(hora)].index[0]
        start = divisao - 100
        treino = dados.loc[start:divisao]
        teste = dados.loc[divisao:]

        steps = int(
            input("Quantos steps a frente das %s h deseja prever?" % hora))
        teste = teste[:steps + 1]
        data_prev = dados.loc[divisao + 1:]
        data_prev = data_prev[:steps]

        #Realiza o forecast
        fc, se, conf = model_fit.forecast(steps, alpha=0.05)  #95% de confiança

        #Valores da previsão recebem o index da data_prev
        fc_series = pd.Series(fc, index=data_prev.index)
        lower_series = pd.Series(conf[:, 0], index=data_prev.Data)
        upper_series = pd.Series(conf[:, 1], index=data_prev.Data)

        #Plota a previsão do modelo:
        fig, ax = plt.subplots(figsize=(12, 5))
        ax.plot(treino['Data'],
                treino['Quantidade'],
                label='Observações de fato, usadas para treino')
        ax.plot(teste['Data'],
                teste['Quantidade'],
                label='Comportamento de fato, após %s h' % hora)
        ax.plot(data_prev['Data'], fc_series, label='ARIMA')
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
        plt.fill_between(lower_series.index,
                         lower_series,
                         upper_series,
                         color='k',
                         alpha=0.15)
        plt.title('Local: Radar %d' % radar)
        plt.legend(loc='upper left', fontsize=10)
        plt.show()

        ok = input("Deseja fazer outra previsão? (S/N): ")
        if ok == "N":
            print("Simulação encerrada")
            pronto = True
        else:
            print("Iniciando outra simulação")
            print("================================================")
#statistics_basic

import statistics
#from statistics import mean as m

example_list = [5, 7, 2, 15, 12, 10, 8, 9, 14, 11, 12]

x = statistics.mean(example_list)
print(x)
#print(m(example_list))

y = statistics.mode(example_list)
print(y)

z = statistics.median(example_list)
print(z)

m = statistics.stdev(example_list)
print(m)

n = statistics.variance(example_list)
print(n)

print()
print()

##############################################

#
Пример #37
0
ecdf = sm.distributions.ECDF(rnd)
x = np.linspace(min(rnd), max(rnd))
F = ecdf(x)
plt.step(x, F, label=str(N))
plt.legend(loc='upper left')
plt.xlabel('$x$', fontsize=14)
plt.ylabel('$P$', fontsize=14)
plt.axis([-0.5, mu + 9, 0, 1.4])
plt.grid(True)

#Теоретическая кумулята

x = np.linspace(0, xmean + 200, 10000)
f = 1 - np.e**(-(xmean**-1) * x)
plt.plot(x, f)
plt.show()

print()
print('Выборочное среднее:')
print(sum(rnd) / len(rnd))
print()
import statistics

print('Выборочная дисперсия:')
print(statistics.variance(rnd))
print()
print('Выборочное стандартное отклонение:')
import statistics

print(statistics.stdev(rnd))
Пример #38
0
# Q1
# =============================================================================

import statistics as st
x = [3, 1.5, 4.5, 6.75, 2.25, 5.75, 2.25]
print(st.mean(x))
print(st.harmonic_mean(x))
print(st.median(x))
print(st.median_low(x))
print(st.median_high(x))
print(st.median_grouped(x))
print(st.mode(x))
print(st.pstdev(x))
print(st.pvariance(x))
print(st.stdev(x))
print(st.variance(x))

# =============================================================================
# Q2
# =============================================================================
import random
print(random.random())
print(random.randrange(10))
print(random.choice(['ali', 'khalid', 'hussam']))
print(random.sample(range(1000), 10))
print(random.choice('orange academy'))
items = [1, 5, 8, 9, 2, 4]
random.shuffle(items)
print(items)
print(random.randint(20, 30))
print(random.randrange(1000, 2111, 5))
def variancia(lista):
    return st.variance(lista)
#10-4-Error Output and Redirection:
import sys
sys.stderr.write('Error, file not found\n')
sys.stdout.write('hi\n')
#print('waiting user to enter a line...',end="");entr = sys.stdin.readline();print(entr)
#log messages are sent to a file or to sys.stderr:
import logging
logging.debug('Debugging information')

#10-6
#Random Choice
import random
print(random.choice(['apple', 'pear', 'banana']))

#statictics:
import statistics
data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
print("data: ", data)
print('mean: ', statistics.mean(data))
print('median: ', statistics.median(data))
print('stdev: ', statistics.stdev(data))
print('variance: ', statistics.variance(data))

#10-10- Measure execution time:
from timeit import Timer
print("code executing time is: ", Timer('i = 0').timeit())

#15-Foating Point arithmetic: Issues and limitions:
#0.1 is actualy 0.1000000000000000055511151231257827021181583404541015625
print(0.1 + 0.1 + 0.1 == 0.3)  #False
# Records the workload
for line in workload:
    w.write(line)

# Finds where creates and removes are located
creates = [i for i in range(10)]
removes = []
for i in range(len(workload)):
    line = workload[i]
    if line.startswith('create'): creates.append(i + 10)
    if line.startswith('remove'): removes.append(i + 10)

from pprint import pprint
print('{} creates'.format(len(creates)))
pprint(creates, compact=True)
print('{} removes'.format(len(removes)))
pprint(removes)

# Key Statistics
print('{} keys in total'.format(len(keys)))
sorted_popularity = sorted(keys.values(), reverse=True)
top5 = sorted_popularity[:5]
bottom5 = sorted_popularity[-5:]
mean = statistics.mean(sorted_popularity)
variance = statistics.variance(sorted_popularity, mean)
print("mean key popularity = {}; variance of key popularity = {}".format(mean, variance))
print("top5 = {}".format(top5))
print("bottom5 = {}".format(bottom5))

# Finish writing the file
w.close()
import numpy as np
import matplotlib.pyplot as plt
import math


import pandas as pd
import statistics

df = pd.read_csv('sbi_data.csv', usecols = ['Date', 'Close'], nrows = 66)

U = []
for i in range(65):
    U.append( math.log(df.Close[i+1]/df.Close[i]) )

sigma_sq = statistics.variance(U)
sigma = math.sqrt(sigma_sq)

mean = statistics.mean(U)

mu = sigma_sq/2 + mean
print("Mu = {}, Sigma_Sq = {}, Sigma = {}".format(mu, sigma_sq, sigma))

lambda_ = [0.01, 0.05, 0.1, 0.2]

# Algorithm I: Using Poisson Distribution
# (Simulating at Fixed Dates)
for i in range(4):
    X = {}
    t = 0
    X[0] = math.log(df.Close[65])
    
Пример #43
0
# Act
stdev = statistics.stdev(data, mu)
end = timeit.default_timer()
# Assert
print("stdev           = ", stdev)
print(u"      time [\u00B5s] = ", (end - start) * 100000)
assert (abs(stdev - 1.08108) < 0.00001)
#==================================================================================

#==================================================================================
# Arrange
data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
print("data          = ", data)
start = timeit.default_timer()
# Act
variance = statistics.variance(data)
end = timeit.default_timer()
# Assert
print("variance        = ", variance)
print(u"      time [\u00B5s] = ", (end - start) * 100000)
assert (abs(variance - 1.37202) < 0.00001)
#==================================================================================

#==================================================================================
# Arrange
mu = statistics.mean(data)
start = timeit.default_timer()
# Act
variance = statistics.variance(data, mu)
end = timeit.default_timer()
# Assert
Пример #44
0
def test_statistics(values):
    s = online_stats.Statistics(values)

    assert s.mean() == statistics.mean(values)
    assert s.variance() == statistics.variance(values)
    assert s.stdev() == statistics.stdev(values)
Пример #45
0
import math
import random
import statistics

print(math.sqrt(16))

print(random.randint(10,15))

num = [1,5,10]

print("Mean: ",statistics.mean(num))
print("Median: ",statistics.median(num))
print("Mode: ", statistics.mode(num))


#-1-------------------------

print("Выборачная дисперсия данных: ",statistics.variance(num))
Пример #46
0
def main():

    print("Validating Connected IoT Devices!")
    DM.dm_engine()

    DM.block_all_ips()

    # Importing the dataset
    dataset = pd.read_csv('/home/pi/Software/IoT-HASS/CICIDS2017_Sample.csv')

    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, 78].values

    # Splitting the dataset into the Training set and Test set
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=0)

    ############## Start of Feature Scaling ###################
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # Fitting Decision Tree Classification to the Training set
    from sklearn.tree import DecisionTreeClassifier
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)

    # Feature Selection
    from sklearn.feature_selection import SelectKBest, SelectPercentile, chi2

    KBestSelector = SelectKBest(k=5)
    KBestSelector = KBestSelector.fit(X_train, y_train)
    X_train_FS = KBestSelector.transform(X_train)

    names = dataset.iloc[:, :-1].columns.values[KBestSelector.get_support()]
    scores = KBestSelector.scores_[KBestSelector.get_support()]
    names_scores = list(zip(names, scores))
    ns_df = pd.DataFrame(data=names_scores, columns=['Feat_Name', 'F_Score'])
    ns_df_sorted = ns_df.sort_values(['F_Score', 'Feat_Name'])
    #print(ns_df_sorted)

    # Fit the model with the new reduced features
    classifier.fit(X_train_FS, y_train)

    # Predicting the Test set results
    X_test_FS = KBestSelector.transform(X_test)
    y_pred = classifier.predict(X_test_FS)

    conn = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, socket.ntohs(3))

    # define array variables to hold time and statistics
    TimeBetBwdPkts = 0
    NumBwdPkts = 0
    NumIdleFlow = 0
    prev_fin_flag = 0
    flow_idle_start_time = datetime.datetime.now()
    flow_idle_end_time = datetime.datetime.now()
    AllTimesBetBwdPkts = []
    AllflowIdleTimes = []
    AllPacketLengths = []

    max_biat = 0
    mean_biat = 0
    std_biat = 0
    pkt_len_varience = 0
    std_idle = 0

    while True:
        raw_data, addr = conn.recvfrom(65535)
        dest_mac, src_mac, eth_proto, data = unpack_ethernet_frame(raw_data)

        # get packet length or size
        packet_length = len(raw_data)
        AllPacketLengths.append(packet_length)

        # IPv4
        if eth_proto == 8:
            (version, header_length, ttl, proto, src, target,
             data) = ipv4_packet_header(data)

            # TCP packet
            if proto == 6:
                (src_port, dest_port, sequence, acknowledgement, flag_urg,
                 flag_ack, flag_psh, flag_rst, flag_syn, flag_fin,
                 data) = unpack_tcp_segment(data)

                # capture packet flow
                # we will identifiy each flow by determining when src and dst ip change

                # first capture the original src and dst IPs
                prev_src_ip = src
                prev_target_ip = target

                if flag_fin == '1' and prev_fin_flag == '0':
                    flow_idle_start_time = datetime.datetime.now()
                    NumIdleFlow = NumIdleFlow + 1
                elif flag_fin == '0' and prev_fin_flag == '1':
                    flow_idle_end_time = datetime.datetime.now()
                else:
                    flow_idle_start_time = datetime.datetime.now()
                    flow_idle_end_time = datetime.datetime.now()

                prev_fin_flag = flag_fin

                flowIdleTime = (flow_idle_end_time -
                                flow_idle_start_time).microseconds

                AllflowIdleTimes.append(flowIdleTime)

                LastTimeBwdPktSeen = datetime.datetime.now()

                if (NumBwdPkts == 1):
                    TimeBetBwdPkts = 0
                elif (NumBwdPkts > 1):
                    TimeBetBwdPkts = (datetime.datetime.now() -
                                      LastTimeBwdPktSeen).microseconds
                else:
                    TimeBetBwdPkts = 0

                NumBwdPkts = NumBwdPkts + 1
                AllTimesBetBwdPkts.append(TimeBetBwdPkts)

            # get statistics values for backwards packets
            if sum(AllTimesBetBwdPkts) == 0:
                mean_biat = 0
                max_biat = 0
                std_biat = 0
            else:
                mean_biat = stats.mean(AllTimesBetBwdPkts)
                max_biat = max(AllTimesBetBwdPkts)
                std_biat = stats.stdev(AllTimesBetBwdPkts)

            if (sum(AllflowIdleTimes) > 0 and len(AllflowIdleTimes) > 1):
                std_idle = stats.stdev(AllflowIdleTimes)

            else:
                std_idle = 0

            if (sum(AllPacketLengths) > 0 and len(AllPacketLengths) > 1):
                pkt_len_varience = stats.variance(AllPacketLengths)
            else:
                pkt_len_varience = 0

            # Invoking iot_hass() function
            iot_hass(mean_biat, std_biat, max_biat, pkt_len_varience, std_idle,
                     src, target, classifier, dest_mac, src_mac, raw_data)
Пример #47
0
""" LV-MaxSonar data
	PW: This pin outputs a pulse width representation of range.
	The distance can be calculated using the scale factor of 147uS per inch.
	Range is (0.88, 37.5) in mS
"""

# begin
print(
    'Reading from GPIO pin BCM{0}. Press ^Z, ^C, or use another signal to exit.\n'
    .format(input_pin))
time.sleep(1.5)

# continuously measure the input pin (albeit still too slow, so it's effectively undersampling)
while not do_exit:
    # read from input
    x = int(GPIO.input(input_pin))  # 1 or 0

    # apply IIR low pass filter (undersampling, so it requires an average)
    acc += k * (x - acc)
    distance = translate(acc, 0, 1, 0.88, 37.5) / 0.147 * 2.51 / 100.0
    past_measurements.append(distance)
    if (len(past_measurements) > 10):
        past_measurements.pop(0)

    print('PWM: {0:.2f}\t\tDistance: {1:.2f}m\t\tVariance: {2:.2f}'.format(
        acc, distance, variance(past_measurements)))

    time.sleep(0.05)

GPIO.cleanup()
Пример #48
0
 def variance(self):
     return stats.variance(self.values())
Пример #49
0
def variance(data):
    return statistics.variance(data)
Пример #50
0
from statistics import variance, stdev
import numpy as np

coffee = np.array([202, 177, 121, 148, 89, 121, 137, 158])

#분산 계산
cf_var = variance(coffee)
print("Simple Variance :", round(cf_var, 2))
Пример #51
0
        print("Episode: " + str(e) + " Score: " + str(score) +
              " Max height: " + str(max_h),
              end="\r",
              flush=False)

    iter += 1
    # Plot graph of rewards per episode
    #plt.plot(np.arange(epochs),episode_rewards[epochs*(iter-1):])
    #plt.show()
    print("Mean reward: ", stats.mean(episode_rewards[epochs * (iter - 1):]))
    mean_rewards.append(stats.mean(episode_rewards[epochs * (iter - 1):]))
    print("Std Deviation: ",
          stats.stdev(episode_rewards[epochs * (iter - 1):]))
    mean_stddevs.append(stats.stdev(episode_rewards[epochs * (iter - 1):]))
    print("Variance: ",
          stats.variance(episode_rewards[epochs * (iter - 1):]) / epochs)
    mean_variances.append(
        stats.variance(episode_rewards[epochs * (iter - 1):]) / epochs)
env.close()

plt.plot(np.arange(epochs * iter), episode_rewards)
plt.show()
print("Overall mean reward: ", stats.mean(episode_rewards))
print("Overall std deviation: ", stats.stdev(episode_rewards))
print("Overall variance: ", stats.variance(episode_rewards) / (epochs * iter))
plt.figure(figsize=(10, 5))
plt.plot(l_rate, mean_rewards)
#plt.plot(l_rate,mean_stddevs)
#plt.plot(l_rate,mean_variances)
plt.show()
mean_Bare_Nuclei 		    = stat.mean(data[:,6])
mean_Bland_Chromatin 		    = stat.mean(data[:,7])
mean_Normal_Nucleoli 		    = stat.mean(data[:,8])
mean_Mitoses 			    = stat.mean(data[:,9])

stdev_Clump_Thickness 		    = stat.stdev(data[:,1])
stdev_Uniformity_of_Cell_Size 	    = stat.stdev(data[:,2])
stdev_Uniformity_of_Cell_Shape 	    = stat.stdev(data[:,3])
stdev_Marginal_Adhesion 	    = stat.stdev(data[:,4])
stdev_Single_Epithelial_Cell_Size   = stat.stdev(data[:,5])
stdev_Bare_Nuclei 		    = stat.stdev(data[:,6])
stdev_Bland_Chromatin 		    = stat.stdev(data[:,7])
stdev_Normal_Nucleoli 		    = stat.stdev(data[:,8])
stdev_Mitoses 			    = stat.stdev(data[:,9])

variance_Clump_Thickness 	    = stat.variance(data[:,1])
variance_Uniformity_of_Cell_Size    = stat.variance(data[:,2])
variance_Uniformity_of_Cell_Shape   = stat.variance(data[:,3])
variance_Marginal_Adhesion 	    = stat.variance(data[:,4])
variance_Single_Epithelial_Cell_Size= stat.variance(data[:,5])
variance_Bare_Nuclei 		    = stat.variance(data[:,6])
variance_Bland_Chromatin 	    = stat.variance(data[:,7])
variance_Normal_Nucleoli 	    = stat.variance(data[:,8])
variance_Mitoses 		    = stat.variance(data[:,9])

skew_Clump_Thickness 		    = skew(data[:,1])
skew_Uniformity_of_Cell_Size 	    = skew(data[:,2])
skew_Uniformity_of_Cell_Shape 	    = skew(data[:,3])
skew_Marginal_Adhesion 	            = skew(data[:,4])
skew_Single_Epithelial_Cell_Size    = skew(data[:,5])
skew_Bare_Nuclei 		    = skew(data[:,6])
        # Get Gutierrèz de Polini (comprensibilidad)

        L = num_letters
        P = num_words
        F = num_sentences

        GP_com = mf.gutierres_polini_comprehension(L, P, F)

        # Get mean number of letters per word

        let_per_word = [len(list(x)) for x in text]
        x_hat = sum(let_per_word) / len(let_per_word)

        # Get variance of number of letters per word

        variance = statistics.variance(let_per_word)

        # Get Muñoz-Muñoz (readability)

        n = num_words
        x_hat = x_hat
        variance = variance

        MM_read = mf.munoz_munoz_read(n, x_hat, variance)

        # Get sentences per hundred words

        hun_sentences = mf.get_sentences(' '.join(hun_words))

        # Get syllables per hundred words
Пример #54
0
import statistics

data = [0, 1, 2, 3, 4, 5, 6]

print(statistics.mean(data))
print(statistics.variance(data))

from urllib.request import urlopen
with urlopen("http://tycho.usno.navy.mil/cgi-bin/timer.pl") as response:
    for line in response:
        line = line.decode("utf-8")
        if "EST" in line or "EDT" in line:
            print(line)

from datetime import date

now = date.today()
birthday = date(1910, 5, 10)
age = now - birthday
print(age.days)
print(int(age.days) // 365)
print(age.days // 365)
Пример #55
0
                log(max_val - observed_val) - log(val))

    with Pool(processes=processes) as pool:
        empirical_dist = sorted(
            pool.map(
                partial(draw_sample, population=population, k=len(regions)),
                range(permutations)))
    pval = sum(val >= observed_val for val in empirical_dist) / permutations
    empirical_mean = mean(empirical_dist)
    if empirical_mean == 0:
        raise RuntimeError(
            'The mean of the empirical distribution appears to be zero. '
            'Increasing the number of permutations MIGHT solve this problem.')
    fold_change = observed_val / empirical_mean
    if parametric:
        empirical_var = variance(empirical_dist)
        a = empirical_mean**2 / empirical_var
        scale = empirical_var / empirical_mean
        mean_pp = gamma.cdf(empirical_mean, a, scale=scale)
        if mean_pp <= conf / 2:
            empirical_conf_lower = 0
            empirical_conf_upper = gamma.ppf(conf, a, scale=scale)
        elif mean_pp >= 1 - conf / 2:
            empirical_conf_lower = gamma.ppf((1 - conf) / 2, a, scale=scale)
            empirical_conf_upper = gamma.ppf(1 - (1 - conf) / 2,
                                             a,
                                             scale=scale)
        else:
            empirical_conf_lower = gamma.ppf(mean_pp - conf / 2,
                                             a,
                                             scale=scale)
Пример #56
0
 ["Mode", statistics.mode(results_drawn)],
 ["Modes", statistics.multimode(results_drawn)],
 # (2C) Percentiles
 ["Median", statistics.median(results_drawn)],
 [
     "Percentiles",
     statistics.quantiles(results_drawn, n=4, method='inclusive')
 ],  # inclusive, exclusive
 [
     "Interquartile range",
     scipy.stats.iqr(results_drawn, interpolation='midpoint')
 ],  # {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’}   
 # (3) Measure of dispersion
 # (3A) Sample
 # PICK ONE OF THESE
 ["Sample variance", statistics.variance(results_drawn)],
 ["Sample standard deviation",
  statistics.stdev(results_drawn)],
 # (3B) Population
 # PICK ONE OF THESE
 ["Population variance",
  statistics.pvariance(results_drawn)],
 ["Population standard deviation",
  statistics.pstdev(results_drawn)],
 # (4) Distortion of symmetry
 # (4A) Skewness
 ["Skewness", scipy.stats.skew(results_drawn)],
 # (4B) Kurtosis
 ["Kurtosis", scipy.stats.kurtosis(results_drawn)],
 # (5) Confidence interval
 [
Пример #57
0
 fft_column_x = get_fft_values(raw_train_data[:9000, 0], 1.5 / len(column_x), len(column_x), 100)
 fft_column_y = get_fft_values(raw_train_data[:9000, 1], 1.5 / len(column_x), len(column_x), 100)
 fft_column_z = get_fft_values(raw_train_data[:9000, 2], 1.5 / len(column_x), len(column_x), 100)
 
 #raw stats
 feature_for_one_csv.append(stats.mean(column_x))
 # stats.harmonic_mean(column_x)   # exista si valori negative pe care nu stie sa le trateze (sol posibila: aduna o valoare ct)
 feature_for_one_csv.append(stats.median(column_x))
 feature_for_one_csv.append(stats.median_low(column_x))
 feature_for_one_csv.append(stats.median_high(column_x))
 feature_for_one_csv.append(stats.median_grouped(column_x))
 # stats.mode(column_x)   # exista 4 equally common values
 feature_for_one_csv.append(stats.pstdev(column_x))
 feature_for_one_csv.append(stats.pvariance(column_x))
 feature_for_one_csv.append(stats.stdev(column_x))
 feature_for_one_csv.append(stats.variance(column_x))
 
 feature_for_one_csv.append(stats.mean(column_y))
 # stats.harmonic_mean(column_y)
 feature_for_one_csv.append(stats.median(column_y))
 feature_for_one_csv.append(stats.median_low(column_y))
 feature_for_one_csv.append(stats.median_high(column_y))
 feature_for_one_csv.append(stats.median_grouped(column_y))
 # stats.mode(column_y)
 feature_for_one_csv.append(stats.pstdev(column_y))
 feature_for_one_csv.append(stats.pvariance(column_y))
 feature_for_one_csv.append(stats.stdev(column_y))
 feature_for_one_csv.append(stats.variance(column_y))
 
 feature_for_one_csv.append(stats.mean(column_z))
 # stats.harmonic_mean(column_z)
Пример #58
0
triplesPorPartido = []


for line in f:
    
    puntosPorPartido.append(int(line[0]))
    faltasPorPartido.append(int(line[1]))
    rebotesPorPartido.append(int(line[2]))
    tirosLibresPorPartido.append(int(line[3]))
    triplesPorPartido.append(int(line[4]))
    



print("---- PUNTOS POR PARTIDO ----")
print("Media: " + str(stats.mean(puntosPorPartido)) + " || " + "Moda: " + str(stats.mode(puntosPorPartido)) + " || " + "Máximo: " + str(max(puntosPorPartido)) + " || " + "Mínimo: " + str(min(puntosPorPartido)) + " || " + "Varianza: " + str(stats.variance(puntosPorPartido)))



print("---- FALTAS POR PARTIDO ----")
print("Media: " + str(stats.mean(faltasPorPartido)) + " || " + "Moda: " + str(stats.mode(faltasPorPartido)) + " || " + "Máximo: " + str(max(faltasPorPartido)) + " || " + "Mínimo: " + str(min(faltasPorPartido)) + " || " + "Varianza: " + str(stats.variance(faltasPorPartido)))

print("---- REBOTES POR PARTIDO ----")
print("Media: " + str(stats.mean(rebotesPorPartido)) + " || " + "Moda: " + str(stats.mode(rebotesPorPartido)) + " || " + "Máximo: " + str(max(rebotesPorPartido)) + " || " + "Mínimo: " + str(min(rebotesPorPartido)) + " || " + "Varianza: " + str(stats.variance(rebotesPorPartido))) 

print("---- TIROS LIBRES POR PARTIDO ----")
print("Media: " + str(stats.mean(tirosLibresPorPartido)) + " || " + "Moda: " + str(stats.mode(tirosLibresPorPartido)) + " || " + "Máximo: " + str(max(tirosLibresPorPartido)) + " || " + "Mínimo: " + str(min(tirosLibresPorPartido)) + " || " + "Varianza: " + str(stats.variance(tirosLibresPorPartido))) 

print("---- TRIPLES POR PARTIDO ----")
print("Media: " + str(stats.mean(triplesPorPartido)) + " || " + "Moda: " + str(stats.mode(triplesPorPartido)) + " || " + "Máximo: " + str(max(triplesPorPartido)) + " || " + "Mínimo: " + str(min(triplesPorPartido)) + " || " + "Varianza: " + str(stats.variance(triplesPorPartido))) 
Пример #59
0
from statistics import mean
from statistics import median
from statistics import mode
from statistics import variance
#import 

print(variance([1,1,1,2,2]))

#print(mode([1,1,1,2,2]))

#print(median([1,1,1,2,2]))

#print(mean([1,2,2,2,1,3,4,1,5]))
Пример #60
0
"""
Created on Tue Jun  9 11:32:35 2020

@author: gabecagnazzi
"""


#Exercise 5.28: Intro to Data Science survey response statitistics

import numpy as np
import statistics

responses = [1, 2, 5, 4, 3, 5, 2, 1, 3, 3, 1, 4, 3, 3, 3, 2, 3, 3, 2, 5]

u_elements, count_elements = np.unique(responses, return_counts=True)

print("Frequency of responses: ")

for i in range(len(u_elements)):
    print("there are ", responses[i], "with values of ", count_elements[i])
    
print("\n\nThe statistics are: ")
print("Min: ", min(responses))
print("Max: ", max(responses))
print("Range: ", max(responses) - min(responses))
print("Median: ", statistics.median(responses))
print("Mode: ", statistics.mode(responses))
print("Variance: ", statistics.variance(responses))
print("Standard Deviation: ", statistics.stdev(responses))