def run(data): f = open("analyzer.log", 'a+') c = costs(data) total = total_cost(data) f.write("\n############# COST #############\n") f.write("Total Cost : {0}\n".format(total)) f.write("Total Cost Mean: {0}\n".format(mean(c))) f.write("Total Cost Median: {0}\n".format(median(c))) f.write("Total Cost Mode: {0}\n".format(mode(c))) f.write("Total Cost Variance: {0}\n".format(variance(c))) cost_action = action(data) f.write("Cost by Action: \n") for k, v in cost_action.iteritems(): f.write("\t{0} -> {1} units\n".format(k, v)) f.write("Percentage Cost by Action: \n") for k, v in cost_action.iteritems(): f.write("\t{0} -> {1} %\n".format(k, round(((v * 100.) / total), 2))) f.write("Cost Variance by Action: \n") for k, v in cost_action.iteritems(): c_action = costs_action(data, k) if len(c_action) > 1: f.write("\t{0} -> {1} units\n".format(k, round(variance(c_action), 2))) else: f.write("\t{0} -> {1} units\n".format(k, round(c_action[0], 2))) key_max, max_value = max_action_value(cost_action) f.write("More Expensive Action by value: {0} -> {1}\n".format(key_max[0], cost_action.get(key_max[0]))) key_max, max_value = max_action_percentage(cost_action, total) f.write("More Expensive Action by percentage: {0} -> {1} %\n".format(key_max, round(max_value, 2))) f.close()
def model_analysis(x, x_matrix, y, line, y_hat, b): n = len(x) # number of samples s_x = stats.stdev(x) # standard deviation of x values s_y = stats.stdev(y) # standard deviation of y values s2_x = stats.variance(x) # variance of x values s2_y = stats.variance(y) # variance of y values s_xy = b * s2_x # covariance of VM mad_temp = 0 SSE = 0 for i in range(len(y)): temp = abs(y[i] - y_hat[i]) mad_temp += temp SSE += temp**2 # sum of squares for error MAD = mad_temp / n s_err = math.sqrt(SSE / (n - 2)) # standard error of estimate s_b = s_err / math.sqrt((n - 1) * s2_x) r = s_xy / (s_x * s_y) # sample coefficient of correlation R_2 = line.score(x_matrix, y) # coefficient of determination R_2calc = s_xy**2 / (s2_x * s2_y) t = b / s_b # t-value for slope assuming true slope = 0 f1.write('\nSkew = ' + str(b) + '\n') f1.write('Coefficient of correlation (r) = ' + str(r) + '\n') #f1.write('Coefficient of determination (R^2) via scikit = ' + str(R_2) + '\n') f1.write('Coefficient of determination (R^2) calculate = ' + str(R_2calc) + '\n') f1.write('Test statistic for clock skew (t) = ' + str(t) + '\n') f1.write('Mean Absolute Deviation (MAD) = ' + str(MAD) + '\n') f1.write('Sum of Squares for Forecast Error (SSE) = ' + str(SSE) + '\n') return
def main(): statistics_file = open("statics.txt", "w") stats = {} final_statistics = {} entropy_kills = [] entropy_dies = [] initialize_server() stats = simulate_population() print(statistics) for key, val in stats.items() : entropy_kills += val[0] entropy_dies += val[1] print(str(entropy_kills)) statistics_file.write(str(entropy_kills) + "\n") print(str(entropy_dies)) statistics_file.write(str(entropy_dies) + "\n") final_statistics.update({"mean entropy kill " : statistics.mean(entropy_kills)}) final_statistics.update({"variance entropy kill " : statistics.variance(entropy_kills)}) final_statistics.update({"mean entropy_dies " : statistics.mean(entropy_dies) }) final_statistics.update({"variance entropy_dies " : statistics.variance(entropy_dies) }) for key, val in final_statistics.items(): print(str(key) + " : " + str(val)) statistics_file.write(str(key) + " : " + str(val) + "\n") plt.figure(1) plt.subplot(211) plt.xlabel('id bot') plt.ylabel('kill') plt.boxplot(entropy_kills, labels=list('K')) plt.subplot(212) plt.xlabel('id bot') plt.ylabel('dies') plt.boxplot(entropy_dies, labels=list('D')) plt.show() statistics_file.close()
def countXYZ(data): x=[] y=[] z=[] for tmp in data: x.append(float(int(tmp[0:3],16))*12.0/4096.0) y.append(float(int(tmp[3:6],16))*14.0/4096.0) z.append(float(int(tmp[6:9],16))*36.0/4096.0) return [6.0-sum(x)/len(x),7.0-sum(y)/len(y),sum(z)/len(z),\ statistics.variance(x),statistics.variance(y),statistics.variance(z)]
def caculation(data): trt_1 = data[data['trt'] == 1] trt_0 = data[data['trt'] == 0] medi = statistics.median(trt_1['y']) - statistics.median(trt_0['y']) mean = statistics.mean(trt_1['y']) - statistics.mean(trt_0['y']) peop = len(trt_1) + len(trt_0) vari = statistics.variance(trt_1['y']) + statistics.variance(trt_0['y']) z_stat, p_val = stats.ranksums(trt_0['y'], trt_1['y']) return [medi, mean, peop, p_val]
def adjRating(self, ratings, VERBOSE=False): new_ratings = {} change = False max_change = 0.0 for team in self.teams: # Set up arrays for ORating and DRating ODiff = [] DDiff = [] ct = self.teams[team] if VERBOSE: print("%s" % ct.name) print(" Home Games:") for game in ct.home_games: ODiff.append(game.hs + ratings[game.at.name][1]) DDiff.append(game.aws - ratings[game.at.name][0]) if VERBOSE: print(" %s: " "ODiff Entry=%.2f, " "DDiff Entry=%.2f" % (game, ODiff[len(ODiff)-1], DDiff[len(DDiff)-1])) if VERBOSE: print(" Away Games:") for game in ct.away_games: ODiff.append(game.aws + ratings[game.ht.name][1]) DDiff.append(game.hs - ratings[game.ht.name][0]) if VERBOSE: print(" %s: " "ODiff Entry=%.2f, " "DDiff Entry=%.2f" % (game, ODiff[len(ODiff)-1], DDiff[len(DDiff)-1])) temp_AdjO = (sum(ODiff)/float(len(ODiff))) - self.average temp_AdjD = self.average - (sum(DDiff)/float(len(DDiff))) ct.AOV = statistics.variance(ODiff)/len(ODiff) ct.ADV = statistics.variance(DDiff)/len(DDiff) new_ratings.update({ct.name: [temp_AdjO, temp_AdjD]}) ch_th_O = abs((temp_AdjO-ratings[ct.name][0])/ratings[ct.name][0]) ch_th_D = abs((temp_AdjD-ratings[ct.name][1])/ratings[ct.name][1]) max_change = max(ch_th_D, ch_th_O, max_change) if ch_th_O > .0025 or ch_th_D > .0025: change = True if VERBOSE: print(" Team Stats") print(" AdjPointsScored: %s" % ODiff) print(" AdjPointsAllowed: %s" % DDiff) print(" AdjO=%.2f, AdjD=%.2f, " "ChO=%.4f, ChD=%.4f" % (temp_AdjO, temp_AdjD, ch_th_O, ch_th_D)) print(" AOV=%.4f, ADV=%.4f" % (ct.AOV, ct.ADV)) # print("%.4f" % max_change) return change, new_ratings
def get_statistics(self): """ Returns various statistics about the benchmark run. :return: See description. """ ret_val = {} not_applicable = 'N/A' if len(self.rollovers) > 1: # Skip the last rollover...it could've been smaller than chunk_size rollover_times = [] last_time = self.started for i in xrange(len(self.rollovers) - 1): rollover_times.append( (self.rollovers[i] - last_time).total_seconds() ) last_time = self.rollovers[i] ret_val['rollover_mean'] = self.num_to_seconds(mean(rollover_times)) ret_val['rollover_stdev'] = self.num_to_seconds(stdev(rollover_times)) ret_val['rollover_variance'] = self.num_to_seconds(variance(rollover_times)) else: ret_val['rollover_mean'] = not_applicable ret_val['rollover_stdev'] = not_applicable ret_val['rollover_variance'] = not_applicable if self.initial_mem_usage is None: ret_val['initial_mem_usage'] = not_applicable else: ret_val['initial_mem_usage'] = self.num_to_megabytes(self.initial_mem_usage) if len(self.resources) > 0: cpu_util_list = [x[1] for x in self.resources] ret_val['cpu_util_mean'] = self.num_to_percent(mean(cpu_util_list)) ret_val['cpu_util_stdev'] = self.num_to_percent(stdev(cpu_util_list)) if len(cpu_util_list) > 1: ret_val['cpu_util_variance'] = self.num_to_percent(variance(cpu_util_list)) else: ret_val['cpu_util_variance'] = not_applicable mem_usage_list = [x[2] for x in self.resources] ret_val['mem_usage_mean'] = self.num_to_megabytes(mean(mem_usage_list)) else: ret_val['cpu_util_mean'] = not_applicable ret_val['cpu_util_stdev'] = not_applicable ret_val['cpu_util_variance'] = not_applicable ret_val['mem_usage_mean'] = not_applicable return ret_val
def DemoStatistic(dataParam): #calculate mean mean = statistics.mean(dataParam); #calculate median median = statistics.median(dataParam); #calculate standard deviation stdv = statistics.variance(dataParam); #count values outside 3 sigma range noiseCount = 0; for value in dataParam: if value < (-3* stdv) + mean or value > (3*stdv) + mean: #print(" %.4f" %(value)); noiseCount += 1; print("-----------------Simple------------------------"); print("Data length: %d" %(len(dataParam)) ); print("Values outside 3sigma: %d" %(noiseCount) ); print("Mean: %.7f" %(mean)); print("Median: %.7f"%(median)); print("Standard deviation: %.7f"%(stdv)); print("------------------------------------------------");
def analyze(graphs): """summary stats for the graphs: >>> graphs = [{'win': ['a', 'b', 'philosophy']}, {'win': ['c', 'd', 'philosophy']}, {'fail': ['e', 'f', 'g', 'h', 'i', 'j', 'k']}] >>> analyze(graphs) ... {'min': 2, 'max': 2, 'mean': 2.0, 'median': 2.0, 'var': 0.0} """ win_path_lengths = [] fail_path_lengths = [] for graph in graphs: if graph.get('win'): win_path_lengths.append(len(graph['win']) - 1) if graph.get('fail'): fail_path_lengths.append(len(graph['fail']) - 1) #stats win_perc = sum(win_path_lengths)/sum([sum(win_path_lengths), sum(fail_path_lengths)]) min_path_length = min(win_path_lengths) max_path_length = max(win_path_lengths) mean_path_length = mean(win_path_lengths) median_path_length = median(win_path_lengths) var_path_length = variance(win_path_lengths) print('Cache is enabled by default, turning it off will affect the distributions') print('Percentage of pages leading to Philosophy: {}'.format(win_perc)) print('Distribution of paths leading to Philosophy: min {}, max {}, mean {}, median {}, var {}'.format( min_path_length, max_path_length, mean_path_length, median_path_length, var_path_length)) return dict(min=min_path_length, max=max_path_length, mean=mean_path_length, median=median_path_length, var=var_path_length)
def discard_spurious_lines(lines, expected): """ Discards the discordant line(s). Discards the line or lines that minimizes the variance of the distances between the set of the other lines (of size expected). The hypothesis is that, when more lines than expected were detected, the variance of the distances bewteen lines will be minimized when the spurious lines are out of the set of selected lines. Be aware of the number of possible combinations before calling this function. Having just one or two extra lines should generally be ok. """ best_variance = math.inf for combination in itertools.combinations(lines, expected): diffs = [b[0] - a[0] for a, b in zip(combination[:-1], combination[1:])] variance = statistics.variance(diffs) if variance < best_variance: best_combination = combination best_variance = variance return best_combination
def makeCharactOutOfEndDifferencesList(listdiffs, type=STAT_MEAN): """ type=0, mean 1, variance 2, sigma """ import copy import statistics import scipy.stats as scps lst = copy.deepcopy(listdiffs) func = { -1: statistics.median, 0: statistics.mean, 1: statistics.variance, 2: lambda x: math.sqrt(statistics.variance(x)), 3: scps.skewtest, 4: scps.kurtosistest, } for j in range(len(lst)): for k in range(len(lst[j])): lst[j][k] = func[type](lst[j][k]) return lst
def compute(self, ydata): y = ydata.T var_y = statistics.variance(y) self.summary = self.summary+var_y self.var_y.append(var_y) return { 'summary': self.summary, 'var_y': self.var_y }
def compute(self, ydata): y = np.swapaxes(ydata,0,1).T var_y = sum([statistics.variance(y[w])/self.meta.nwalkers for w in xrange(self.meta.nwalkers)]) self.summary = self.summary+var_y self.var_y.append(var_y) return { 'summary': self.summary, 'var_y': self.var_y }
def async_update(self): """Get the latest data and updates the states.""" if self._max_age is not None: self._purge_old() if not self.is_binary: try: # require only one data point self.mean = round(statistics.mean(self.states), 2) self.median = round(statistics.median(self.states), 2) except statistics.StatisticsError as err: _LOGGER.error(err) self.mean = self.median = STATE_UNKNOWN try: # require at least two data points self.stdev = round(statistics.stdev(self.states), 2) self.variance = round(statistics.variance(self.states), 2) except statistics.StatisticsError as err: _LOGGER.error(err) self.stdev = self.variance = STATE_UNKNOWN if self.states: self.count = len(self.states) self.total = round(sum(self.states), 2) self.min = min(self.states) self.max = max(self.states) self.change = self.states[-1] - self.states[0] self.average_change = self.change if len(self.states) > 1: self.average_change /= len(self.states) - 1 if self._max_age is not None: self.max_age = max(self.ages) self.min_age = min(self.ages) else: self.min = self.max = self.total = STATE_UNKNOWN self.average_change = self.change = STATE_UNKNOWN
def getVariances(optimalParameters): params = optimalParameters[0].keys() variances = {} for parameter in params: allValues = [x[parameter] for x in optimalParameters] variances[parameter] = statistics.variance(allValues) return variances
def test_parametric_variates(self): """ Verify the correctness of the random variates generation. :return: None """ for variate in Variate: params = self.varparams[variate] sample = list() for i in range(self.samsize): rndvalue = Variate[variate.name].vargen.generate(u=self.rndgen, **params) sample.append(rndvalue) expected_mean = self.check_mean[variate](**params) actual_mean = mean(sample) print("{}: expected mean {}, got {}".format(variate.name, expected_mean, actual_mean)) if self.makeAssertion: self.assertLessEqual(abs(expected_mean - actual_mean) / expected_mean, self.err * expected_mean, "Mean error for variate {}: expected {} got {}" .format(variate.name, expected_mean, actual_mean)) expected_variance = self.check_variance[variate](**params) actual_variance = variance(sample) print("{}: expected variance {}, got {}".format(variate.name, expected_variance, actual_variance)) if self.makeAssertion: self.assertLessEqual(abs(expected_variance - actual_variance) / expected_variance, self.err * expected_variance, "Variance error for variate {}: expected {} got {}" .format(variate.name, expected_variance, actual_variance))
def post_trigger_run(self, trigger: RawTrigger, main_plugin: MainPlugin, *args, **kwargs) -> None: """ Collects the benchmark results and saves them in a file :param trigger: the trigger instance that is run :param main_plugin: the main plugin under which we run :param args: additional arguments :param kwargs: additional keyword arguments """ if len(trigger.returned_information) == 1: mean = trigger.returned_information[0] stdev = 0 variance = 0 else: mean = statistics.mean(trigger.returned_information) stdev = statistics.stdev(trigger.returned_information) variance = statistics.variance(trigger.returned_information) if not os.path.exists(os.path.dirname(self.benchmark_log)): os.makedirs(os.path.dirname(self.benchmark_log)) with open(self.benchmark_log, "a") as logs: logs.write("{name}, {plugin}, {slice_size}, {mean}, {stdev}, {variance} {total_numbers}\n".format( name=trigger.conf.get("name"), plugin=main_plugin.__class__.__name__, slice_size=kwargs.get("number", None), mean=mean, stdev=stdev, variance=variance, total_numbers=" ".join([str(data) for data in trigger.returned_information])))
def get_stats(arr): min_ = min(arr) max_ = max(arr) range_ = max_ - min_ mean_ = statistics.mean(arr) median_ = statistics.median(arr) amp_ = max_ - mean_ try: stdev_ = statistics.stdev(arr) var_ = statistics.variance(arr) except: stdev_ = 0 var_ = 0 rms_ = rms(arr) result = [] result.append(min_) result.append(max_) result.append(range_) result.append(mean_) result.append(median_) result.append(amp_) result.append(stdev_) result.append(var_) result.append(rms_) return result
def stats_of_tab(fname): # Create a list of "statistics" objects, keyed by title from the spreadsheet tbl = [] with open(fname, "r") as f: next(f) # Ignore first line, its just an index for line in f: cols = line.strip().split() title = cols[0] # Collect statistics on each row data = [int(x) for x in cols[1::]] obj = {} obj["key"] = cols[0] obj["num-samples"] = len(data) obj["mean"] = statistics.mean(data) obj["median"] = statistics.median(data) obj["min"] = min(data) obj["max"] = max(data) obj["range"] = obj["max"] - obj["min"] obj["std"] = statistics.stdev(data) obj["variance"] = statistics.variance(data) ci_offset = (Z95 * obj["std"]) / (math.sqrt(obj["num-samples"])) obj["confidence-interval"] = [obj["mean"] - ci_offset, obj["mean"] + ci_offset] tbl.append(obj) return tbl
def get(self): values = sorted(self.reservoir.values) count = len(values) # instead of failing return empty / subset so that json2insert & co # don't fail if count == 0: return dict(n=0) elif count == 1: return dict(min=values[0], max=values[0], mean=values[0], n=count) percentiles = [percentile(values, p) for p in self.plevels] min_ = values[0] max_ = values[-1] stdev = statistics.stdev(values) return dict( min=min_, max=max_, mean=statistics.mean(values), median=statistics.median(values), variance=statistics.variance(values), error_margin=error_margin(95, stdev, self.reservoir.count), stdev=stdev, # replace . with _ so that the output can be inserted into crate # crate doesn't allow dots in column names percentile={str(i[0]).replace('.', '_'): i[1] for i in zip(self.plevels, percentiles)}, n=self.reservoir.count, samples=self.reservoir.values )
def getVar(self): if(self.index == 1): return 0 elif(self.index < self.N): return statistics.variance(self.window[0:self.index]) # Make return 0? return self.variance
def get_weight_variance(self, *args, **kwargs): Weight = apps.get_model('ddm_core', 'Weight') weights = Weight.objects.filter(criterion=self, *args, **kwargs).values_list('value', flat=True) try: return statistics.variance(weights) except statistics.StatisticsError: return 0
def computeFScore(protList,dataTCSs,dataCCSs,combTCSsCCSs): final_Prot={} topProteins={} for i in range(77): protein=protList[i] mean_TCSs=dataTCSs[protList[i]].mean() mean_CCSs=dataCCSs[protList[i]].mean() combo_mean=combTCSsCCSs[protList[i]].mean() numeratorFScore=(((mean_TCSs-combo_mean)**2)+(mean_CCSs-combo_mean)**2) denominatorFScore=((stat.variance(dataTCSs[protList[i]]))+(stat.variance(dataCCSs[protList[i]]))) #denominatorFScore=((dataTCSs[protList[i]].var(ddof=True))+(dataCCSs[protList[i]].var(ddof=True))) FScore=numeratorFScore/denominatorFScore final_Prot[protein]=FScore sortedDict=sorted(final_Prot.items(), key=lambda x:x[1],reverse=True) topProteins=sortedDict[:5] return topProteins
def print_mean_var(sticker_prices): stock_variances = [] for ticker in sticker_prices: open_prices = sticker_prices[ticker] if open_prices: stock_variances.append((ticker, statistics.mean(open_prices), statistics.variance(open_prices))) data = sorted(stock_variances, key=operator.itemgetter(2)) print '\n'.join([str(a) for a in data])
def get_score_variance(self, *args, **kwargs): Score = apps.get_model('ddm_core', 'Score') scores = Score.objects.filter(criterion=self, *args, **kwargs).values_list('value', flat=True) try: return statistics.variance(scores) except statistics.StatisticsError: return 0
def standardize(x): if x.shape[0] == 0: return for i in range(1, x.shape[1]): variance = stat.variance(x[:, i]) mean = stat.mean(x[:, i]) if variance == 0: break x[:, i] = (x[:, i] - mean)/variance
def GARCH11_logL(param, r): omega, alpha, beta = param n = len(r) s = np.ones(n)*0.01 s[2] = st.variance(r[0:3]) for i in range(3, n): s[i] = omega + alpha*r[i-1]**2 + beta*(s[i-1]) # GARCH(1,1) model logL = -((-np.log(s) - r**2/s).sum()) return logL
def print_stats(times, tag): print("Num. samples: %s" % len(times)) mean = statistics.mean(times) print("Average %s: %.2f" % (tag, mean)) print("Median %s: %s" % (tag, statistics.median(times))) print("Min %s: %s" % (tag, min(times))) print("Max %s: %s" % (tag, max(times))) delta = Z * (math.sqrt(statistics.variance(times)) / math.sqrt(len(times))) print("95%% confidence: %.2f -- %.2f" % (mean - delta, mean + delta))
def is_interesting(self, name): #print(name.occurrences) occ = list(name.occurrences.values()) #print(occ) for i in range(len(occ)): if occ[i] == 0: occ[i] = 1 #print(occ) mean = np.mean(occ) stdev = np.std(occ) var = math.sqrt(variance(occ, mean)) var2 = variance(occ, mean) unique = var2/ np.mean(occ) #normalized_values = (occ - np.mean(occ)) / np.std(occ) # variance = np.std(normalized_values) / np.mean(normalized_values) uniqueness = var / np.mean(occ) #print("vales before: ", occ, "values standardized: ", normalized_values, "the variance normalized: ", variance, "the varianced non normalized: ", variance2) if uniqueness > 0.20 and np.mean(occ) > self.span: #print("Mean: ", np.mean(occ)) #print("Variance Level: ", variance) if uniqueness > 1.0 and np.mean(occ) > self.span: print("POPULARITY: very high potential") print("Variance Level: ", uniqueness) return True elif uniqueness > 0.90 and uniqueness < 1.0 and np.mean(occ) > self.span: print("POPULARITY: high potential") print("Variance Level: ", uniqueness) return True elif uniqueness > 0.50 and uniqueness < 0.80 and np.mean(occ) > self.span: print("POPULARITY: probably average or high") print("Variance Level: ", uniqueness) return True elif uniqueness > 0.39 and uniqueness < 0.50 and np.mean(occ) > self.span: print("Movie could be popular") print("Variance Level: ", uniqueness) return True elif uniqueness < 0.39 and np.mean(occ) > self.span: print("Movie name unrelevent to popularity") print("Variance Level: ", uniqueness) return True else: return False
async def async_update(self): """Get the latest data and updates the states.""" _LOGGER.debug("%s: updating statistics.", self.entity_id) if self._max_age is not None: self._purge_old() self.count = len(self.states) if not self.is_binary: try: # require only one data point self.mean = round(statistics.mean(self.states), self._precision) self.median = round(statistics.median(self.states), self._precision) except statistics.StatisticsError as err: _LOGGER.debug("%s: %s", self.entity_id, err) self.mean = self.median = STATE_UNKNOWN try: # require at least two data points self.stdev = round(statistics.stdev(self.states), self._precision) self.variance = round(statistics.variance(self.states), self._precision) except statistics.StatisticsError as err: _LOGGER.debug("%s: %s", self.entity_id, err) self.stdev = self.variance = STATE_UNKNOWN if self.states: self.total = round(sum(self.states), self._precision) self.min = round(min(self.states), self._precision) self.max = round(max(self.states), self._precision) self.min_age = self.ages[0] self.max_age = self.ages[-1] self.change = self.states[-1] - self.states[0] self.average_change = self.change self.change_rate = 0 if len(self.states) > 1: self.average_change /= len(self.states) - 1 time_diff = (self.max_age - self.min_age).total_seconds() if time_diff > 0: self.change_rate = self.average_change / time_diff self.change = round(self.change, self._precision) self.average_change = round(self.average_change, self._precision) self.change_rate = round(self.change_rate, self._precision) else: self.total = self.min = self.max = STATE_UNKNOWN self.min_age = self.max_age = dt_util.utcnow() self.change = self.average_change = STATE_UNKNOWN self.change_rate = STATE_UNKNOWN
def variance(self): '返回DataStruct.price的方差 variance' return self.price.groupby( level=1).apply(lambda x: statistics.variance(x))
import statistics import math agesData = [10, 13, 14, 12, 11, 10, 11, 10, 15] print(statistics.mean(agesData)) print(statistics.mode(agesData)) print(statistics.median(agesData)) print(statistics.variance(agesData)) print(statistics.stdev(agesData)) print(statistics.sqrt(statistics.variance(agesData)))
1.984763432, 0.922213312, 3.327987169, 4.190056135, 5.493183641, 1.864474739, 10.60545309, 2.425821973, 2.726543705, 8.740978348, 6.174819567 ] percent.sort() print(percent) # this ends with error # print(median(percent)) # print(median_low(percent)) # print(median_high(percent)) # this succeeds import statistics print(statistics.fmean(percent)) print(statistics.harmonic_mean(percent)) print(statistics.variance(percent, 2)) print('===========') print(statistics.median(percent)) print(statistics.median_low(percent)) print(statistics.median_high(percent)) # this succeedes from statistics import * print(median(percent)) print(median_low(percent)) print(median_high(percent))
print('The predicted values are:', Yhat) print('The true values are:', Ytrue[1:]) # ### 4- Calculate the residuals which is the difference between predicted values and true values and display it as: # #### The residuals for this estimate are: ______________ # In[44]: res = Ytrue[2:] - Yhat[1:] print('The residuals for this estimate are: ', res) # In[45]: print('The mean of the residuals for this estimate is: ', statistics.mean(res)) print('\nThe variance of the residuals for this estimate is: ', statistics.variance(res)) plt.hist(res) plt.title('Residuals Distribution', pad=40) plt.xlabel('Residual value') plt.ylabel('Number of Observations') plt.figure() plt.show() # ### 5- Calculate the sum square of the residuals. This is norm is called “Sum Square Error” or simply “SSE”. Display the message as follow: # #### The sum square error for this estimate is: _________ # In[47]: sse = 0 SSE = []
def main(): #Recebe o dia escolhido display(Image(filename='calendario.png') ) #Colocar a imagem do calendário na mesma pasta do arquivo python diaok = False while not diaok: dia = int(input("Digite o número do dia desejado (1-30): ")) if dia in range(1, 31): diaok = True else: print("Input inválido, escolha outro valor de dia") print("================================================") data = padroniza_dia( dia) #Padroniza o valor do dia para um string de dois dígitos #Recebe o agrupamento temporal escolhido agrupamentook = False while not agrupamentook: print( "Agrupamentos disponíveis: \n1Min \n2Min \n3Min \n4Min \n5Min \n") agrupamento = input("Digite o agrupamento temporal desejado: ") if agrupamento == "1Min" or agrupamento == "2Min" or agrupamento == "3Min" or agrupamento == "4Min" or agrupamento == "5Min": agrupamentook = True else: print("Input inválido, escolha outro valor de agrupamento") print("================================================") #Recebe o radar escolhido para previsão radares = [10426, 10433, 10482, 10484, 10492, 10500, 10521, 10531] radarok = False while not radarok: print("Radares disponíveis:", radares, sep='\n') radar = int(input("Qual o radar a ser previsto?: ")) if radar in radares: radarok = True else: print("Input inválido, escolha outro valor de agrupamento") print("================================================") #Leitura do arquivo - Alterar com base na localização dos dados do seu computador path = "C:\\Users\\walmart\\Documents\\USP\\TCC\\Dados\\Dados Agrupados\\Grouped by frequency" dados = pd.read_csv(path + "\\" + str(dia) + "\\" + str(dia) + "_group_" + str(agrupamento) + ".csv") dados.Data = pd.to_datetime( dados.Data) #Transforma a coluna Data em formato Data dados = dados[dados['Número Agrupado'] == radar].reset_index( drop=True) #Seleciona apenas o radar que estamos testando #Recebe o horário divisor entre treino/teste horaok = False while not horaok: hora = input( "Formato do horário esperado: 00:00:00 \n Qual o horário de corte entre treino/teste?: " ) if len(hora) == 8 and hora[2] == ":" and hora[5] == ":" and int( hora[0] + hora[1]) in range( 0, 25) and int(hora[3] + hora[4]) in range( 0, 60) and int(hora[6] + hora[7]) in range(0, 60): horaok = True else: print("Input inválido, digite novamente o valor da hora") print("================================================") divisao = dados.loc[dados['Data'] == '2018-03-' + data + ' ' + str(hora)].index[0] start = divisao - 100 #Quantos steps prévios usar para a previsão # Separa o dataframe do radar escolhido em treino e validação treino = dados.loc[start:divisao] teste = dados.loc[divisao:] #CHECAR SE A SÉRIE É ESTACIONÁRIA diferenciacao = dados.Quantidade estacionaria = adfuller(diferenciacao) print('ADF Statistic: %f' % estacionaria[0], '| p-value: %f' % estacionaria[1]) if estacionaria[1] < 0.05: print("A série do radar %s é estacionária, pois p-value < 0.05" % radar) pronto = True d = 0 else: print("A série do radar %s não é estacionária, pois p-value > 0.05" % radar) pronto = False d = 0 while not pronto: diferenciacao = diferenciacao.diff().dropna() d = d + 1 estacionaria = adfuller(diferenciacao) print('ADF Statistic: %f' % estacionaria[0], '| p-value: %f' % estacionaria[1]) if estacionaria[1] < 0.05: pronto = True print("A ordem de diferenciação é %d" % d) plt.rcParams.update({'figure.figsize': (9, 7), 'figure.dpi': 120}) fig, axis = plt.subplots(2, 1, sharex=False) axis[0].plot(dados.Quantidade) axis[0].set_title('Série Original') axis[1].plot(diferenciacao) axis[1].set_title('Série Estacionária') plt.show() pronto = False while not pronto: #ORDEM DA AUTO REGRESSÃO - PACF COM DADOS ESTACIONÁRIOS plt.rcParams.update({'figure.figsize': (9, 7), 'figure.dpi': 120}) fig, axis = plt.subplots(2, 1, sharex=False) axis[0].set(ylim=(0, 1.05)) plot_pacf(diferenciacao, ax=axis[0], lags=100) #ORDEM DA MÉDIA MÓVEL - ACF COM DADOS ESTACIONÁRIOS axis[1].set(ylim=(0, 1.2)) plot_acf(diferenciacao, ax=axis[1], lags=100) plt.show() p = int(input("Qual o valor de p adequado?: ")) q = int(input("Qual o valor de q desejado?: ")) #CONSTRUÇÃO DO MODELO steps = int( input("Quantos steps a frente das %s h deseja prever? " % hora)) teste = teste[:steps + 1] #Vai do horário de corte (dado) até 15+1 steps #A previsão vai do horário de corte+1step por 15 steps data_prev = dados.loc[divisao + 1:] data_prev = data_prev[:steps] #Instancia o modelo e faz o fit no treino model = ARIMA(treino['Quantidade'], order=(p, d, q)) model_fit = model.fit(disp=-1) #Realiza o forecast fc, se, conf = model_fit.forecast(steps, alpha=0.05) #95% de confiança #Valores da previsão recebem o index da data_prev fc_series = pd.Series(fc, index=data_prev.index) lower_series = pd.Series(conf[:, 0], index=data_prev.Data) upper_series = pd.Series(conf[:, 1], index=data_prev.Data) #Plota a previsão do modelo: fig, ax = plt.subplots(figsize=(12, 5)) ax.plot(treino['Data'], treino['Quantidade'], label='Observações de fato, usadas para treino') ax.plot(teste['Data'], teste['Quantidade'], label='Comportamento de fato, após %s h' % hora) ax.plot(data_prev['Data'], fc_series, label='ARIMA') ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) plt.fill_between(lower_series.index, lower_series, upper_series, color='k', alpha=0.15) plt.title('Local: Radar %d' % radar) plt.legend(loc='upper left', fontsize=10) plt.show() #TABELA DOS DADOS PREVISTOS dados_prev = pd.Series(fc, index=data_prev.Data) dados_prev = dados_prev.to_frame() dados_prev.columns = ['Quantidade'] dados_prev['Data'] = dados_prev.index dados_prev = dados_prev[['Data', 'Quantidade']] dados_prev = dados_prev.reset_index(drop=True) #print("Previsão:") #print(dados_prev) #print("") #TABELA DOS DADOS REAIS dados_reais = teste[['Data', 'Quantidade']] dados_reais = dados_reais.reset_index(drop=True) dados_reais = dados_reais[1:] dados_reais = dados_reais.reset_index(drop=True) #print("Real:") #print(dados_reais) #print("") #CÁLCULO DO ERRO erros = list() for i in range(0, 15): erro = dados_reais.loc[i, 'Quantidade'] - dados_prev.loc[i, 'Quantidade'] erros.append(abs(erro)) erros = pd.Series(erros, index=data_prev.Data) erros = erros.to_frame() erros.columns = ['Erro'] erros['Data'] = erros.index erros = erros[['Data', 'Erro']] erros = erros.reset_index(drop=True) #print(erros) var_erro = statistics.variance(erros.Erro) print("") print("A variância do erro dessa simulação foi de: %f" % var_erro) print("================================================") print(model_fit.summary()) print("================================================") forecast_accuracy(fc, dados_reais.Quantidade) #DEFINE QUAL SERIA O MODELO IDEAL PARA A PREVISÃO modell = pm.auto_arima(dados.Quantidade, start_p=0, star_q=0, test='adf', max_p=p, max_q=q, m=1, d=d, seasonal=False, start_P=0, D=0, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print(modell.summary()) ok = input("O modelo está adequado? (S/N) ") if ok == "S": pronto = True print("Modelo definido para aplicação em outros horários") print("================================================") else: print("Escolha outras ordens de modelo") print("================================================") #SIMULAÇÃO DE OUTROS VALORES APÓS DEFINIÇÃO DO MODELO pronto = False while not pronto: horaok = False while not horaok: hora = input( "Formato do horário esperado: 00:00:00 \n Qual o horário de início da previsão?: " ) if len(hora) == 8 and hora[2] == ":" and hora[5] == ":" and int( hora[0] + hora[1]) in range( 0, 25) and int(hora[3] + hora[4]) in range( 0, 60) and int(hora[6] + hora[7]) in range(0, 60): horaok = True print("================================================") else: print("Input inválido, digite novamente o valor da hora") print("================================================") divisao = dados.loc[dados['Data'] == '2018-03-' + data + ' ' + str(hora)].index[0] start = divisao - 100 treino = dados.loc[start:divisao] teste = dados.loc[divisao:] steps = int( input("Quantos steps a frente das %s h deseja prever?" % hora)) teste = teste[:steps + 1] data_prev = dados.loc[divisao + 1:] data_prev = data_prev[:steps] #Realiza o forecast fc, se, conf = model_fit.forecast(steps, alpha=0.05) #95% de confiança #Valores da previsão recebem o index da data_prev fc_series = pd.Series(fc, index=data_prev.index) lower_series = pd.Series(conf[:, 0], index=data_prev.Data) upper_series = pd.Series(conf[:, 1], index=data_prev.Data) #Plota a previsão do modelo: fig, ax = plt.subplots(figsize=(12, 5)) ax.plot(treino['Data'], treino['Quantidade'], label='Observações de fato, usadas para treino') ax.plot(teste['Data'], teste['Quantidade'], label='Comportamento de fato, após %s h' % hora) ax.plot(data_prev['Data'], fc_series, label='ARIMA') ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) plt.fill_between(lower_series.index, lower_series, upper_series, color='k', alpha=0.15) plt.title('Local: Radar %d' % radar) plt.legend(loc='upper left', fontsize=10) plt.show() ok = input("Deseja fazer outra previsão? (S/N): ") if ok == "N": print("Simulação encerrada") pronto = True else: print("Iniciando outra simulação") print("================================================")
#statistics_basic import statistics #from statistics import mean as m example_list = [5, 7, 2, 15, 12, 10, 8, 9, 14, 11, 12] x = statistics.mean(example_list) print(x) #print(m(example_list)) y = statistics.mode(example_list) print(y) z = statistics.median(example_list) print(z) m = statistics.stdev(example_list) print(m) n = statistics.variance(example_list) print(n) print() print() ############################################## #
ecdf = sm.distributions.ECDF(rnd) x = np.linspace(min(rnd), max(rnd)) F = ecdf(x) plt.step(x, F, label=str(N)) plt.legend(loc='upper left') plt.xlabel('$x$', fontsize=14) plt.ylabel('$P$', fontsize=14) plt.axis([-0.5, mu + 9, 0, 1.4]) plt.grid(True) #Теоретическая кумулята x = np.linspace(0, xmean + 200, 10000) f = 1 - np.e**(-(xmean**-1) * x) plt.plot(x, f) plt.show() print() print('Выборочное среднее:') print(sum(rnd) / len(rnd)) print() import statistics print('Выборочная дисперсия:') print(statistics.variance(rnd)) print() print('Выборочное стандартное отклонение:') import statistics print(statistics.stdev(rnd))
# Q1 # ============================================================================= import statistics as st x = [3, 1.5, 4.5, 6.75, 2.25, 5.75, 2.25] print(st.mean(x)) print(st.harmonic_mean(x)) print(st.median(x)) print(st.median_low(x)) print(st.median_high(x)) print(st.median_grouped(x)) print(st.mode(x)) print(st.pstdev(x)) print(st.pvariance(x)) print(st.stdev(x)) print(st.variance(x)) # ============================================================================= # Q2 # ============================================================================= import random print(random.random()) print(random.randrange(10)) print(random.choice(['ali', 'khalid', 'hussam'])) print(random.sample(range(1000), 10)) print(random.choice('orange academy')) items = [1, 5, 8, 9, 2, 4] random.shuffle(items) print(items) print(random.randint(20, 30)) print(random.randrange(1000, 2111, 5))
def variancia(lista): return st.variance(lista)
#10-4-Error Output and Redirection: import sys sys.stderr.write('Error, file not found\n') sys.stdout.write('hi\n') #print('waiting user to enter a line...',end="");entr = sys.stdin.readline();print(entr) #log messages are sent to a file or to sys.stderr: import logging logging.debug('Debugging information') #10-6 #Random Choice import random print(random.choice(['apple', 'pear', 'banana'])) #statictics: import statistics data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] print("data: ", data) print('mean: ', statistics.mean(data)) print('median: ', statistics.median(data)) print('stdev: ', statistics.stdev(data)) print('variance: ', statistics.variance(data)) #10-10- Measure execution time: from timeit import Timer print("code executing time is: ", Timer('i = 0').timeit()) #15-Foating Point arithmetic: Issues and limitions: #0.1 is actualy 0.1000000000000000055511151231257827021181583404541015625 print(0.1 + 0.1 + 0.1 == 0.3) #False
# Records the workload for line in workload: w.write(line) # Finds where creates and removes are located creates = [i for i in range(10)] removes = [] for i in range(len(workload)): line = workload[i] if line.startswith('create'): creates.append(i + 10) if line.startswith('remove'): removes.append(i + 10) from pprint import pprint print('{} creates'.format(len(creates))) pprint(creates, compact=True) print('{} removes'.format(len(removes))) pprint(removes) # Key Statistics print('{} keys in total'.format(len(keys))) sorted_popularity = sorted(keys.values(), reverse=True) top5 = sorted_popularity[:5] bottom5 = sorted_popularity[-5:] mean = statistics.mean(sorted_popularity) variance = statistics.variance(sorted_popularity, mean) print("mean key popularity = {}; variance of key popularity = {}".format(mean, variance)) print("top5 = {}".format(top5)) print("bottom5 = {}".format(bottom5)) # Finish writing the file w.close()
import numpy as np import matplotlib.pyplot as plt import math import pandas as pd import statistics df = pd.read_csv('sbi_data.csv', usecols = ['Date', 'Close'], nrows = 66) U = [] for i in range(65): U.append( math.log(df.Close[i+1]/df.Close[i]) ) sigma_sq = statistics.variance(U) sigma = math.sqrt(sigma_sq) mean = statistics.mean(U) mu = sigma_sq/2 + mean print("Mu = {}, Sigma_Sq = {}, Sigma = {}".format(mu, sigma_sq, sigma)) lambda_ = [0.01, 0.05, 0.1, 0.2] # Algorithm I: Using Poisson Distribution # (Simulating at Fixed Dates) for i in range(4): X = {} t = 0 X[0] = math.log(df.Close[65])
# Act stdev = statistics.stdev(data, mu) end = timeit.default_timer() # Assert print("stdev = ", stdev) print(u" time [\u00B5s] = ", (end - start) * 100000) assert (abs(stdev - 1.08108) < 0.00001) #================================================================================== #================================================================================== # Arrange data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] print("data = ", data) start = timeit.default_timer() # Act variance = statistics.variance(data) end = timeit.default_timer() # Assert print("variance = ", variance) print(u" time [\u00B5s] = ", (end - start) * 100000) assert (abs(variance - 1.37202) < 0.00001) #================================================================================== #================================================================================== # Arrange mu = statistics.mean(data) start = timeit.default_timer() # Act variance = statistics.variance(data, mu) end = timeit.default_timer() # Assert
def test_statistics(values): s = online_stats.Statistics(values) assert s.mean() == statistics.mean(values) assert s.variance() == statistics.variance(values) assert s.stdev() == statistics.stdev(values)
import math import random import statistics print(math.sqrt(16)) print(random.randint(10,15)) num = [1,5,10] print("Mean: ",statistics.mean(num)) print("Median: ",statistics.median(num)) print("Mode: ", statistics.mode(num)) #-1------------------------- print("Выборачная дисперсия данных: ",statistics.variance(num))
def main(): print("Validating Connected IoT Devices!") DM.dm_engine() DM.block_all_ips() # Importing the dataset dataset = pd.read_csv('/home/pi/Software/IoT-HASS/CICIDS2017_Sample.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 78].values # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) ############## Start of Feature Scaling ################### from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Fitting Decision Tree Classification to the Training set from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier(criterion='entropy', random_state=0) classifier.fit(X_train, y_train) # Feature Selection from sklearn.feature_selection import SelectKBest, SelectPercentile, chi2 KBestSelector = SelectKBest(k=5) KBestSelector = KBestSelector.fit(X_train, y_train) X_train_FS = KBestSelector.transform(X_train) names = dataset.iloc[:, :-1].columns.values[KBestSelector.get_support()] scores = KBestSelector.scores_[KBestSelector.get_support()] names_scores = list(zip(names, scores)) ns_df = pd.DataFrame(data=names_scores, columns=['Feat_Name', 'F_Score']) ns_df_sorted = ns_df.sort_values(['F_Score', 'Feat_Name']) #print(ns_df_sorted) # Fit the model with the new reduced features classifier.fit(X_train_FS, y_train) # Predicting the Test set results X_test_FS = KBestSelector.transform(X_test) y_pred = classifier.predict(X_test_FS) conn = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, socket.ntohs(3)) # define array variables to hold time and statistics TimeBetBwdPkts = 0 NumBwdPkts = 0 NumIdleFlow = 0 prev_fin_flag = 0 flow_idle_start_time = datetime.datetime.now() flow_idle_end_time = datetime.datetime.now() AllTimesBetBwdPkts = [] AllflowIdleTimes = [] AllPacketLengths = [] max_biat = 0 mean_biat = 0 std_biat = 0 pkt_len_varience = 0 std_idle = 0 while True: raw_data, addr = conn.recvfrom(65535) dest_mac, src_mac, eth_proto, data = unpack_ethernet_frame(raw_data) # get packet length or size packet_length = len(raw_data) AllPacketLengths.append(packet_length) # IPv4 if eth_proto == 8: (version, header_length, ttl, proto, src, target, data) = ipv4_packet_header(data) # TCP packet if proto == 6: (src_port, dest_port, sequence, acknowledgement, flag_urg, flag_ack, flag_psh, flag_rst, flag_syn, flag_fin, data) = unpack_tcp_segment(data) # capture packet flow # we will identifiy each flow by determining when src and dst ip change # first capture the original src and dst IPs prev_src_ip = src prev_target_ip = target if flag_fin == '1' and prev_fin_flag == '0': flow_idle_start_time = datetime.datetime.now() NumIdleFlow = NumIdleFlow + 1 elif flag_fin == '0' and prev_fin_flag == '1': flow_idle_end_time = datetime.datetime.now() else: flow_idle_start_time = datetime.datetime.now() flow_idle_end_time = datetime.datetime.now() prev_fin_flag = flag_fin flowIdleTime = (flow_idle_end_time - flow_idle_start_time).microseconds AllflowIdleTimes.append(flowIdleTime) LastTimeBwdPktSeen = datetime.datetime.now() if (NumBwdPkts == 1): TimeBetBwdPkts = 0 elif (NumBwdPkts > 1): TimeBetBwdPkts = (datetime.datetime.now() - LastTimeBwdPktSeen).microseconds else: TimeBetBwdPkts = 0 NumBwdPkts = NumBwdPkts + 1 AllTimesBetBwdPkts.append(TimeBetBwdPkts) # get statistics values for backwards packets if sum(AllTimesBetBwdPkts) == 0: mean_biat = 0 max_biat = 0 std_biat = 0 else: mean_biat = stats.mean(AllTimesBetBwdPkts) max_biat = max(AllTimesBetBwdPkts) std_biat = stats.stdev(AllTimesBetBwdPkts) if (sum(AllflowIdleTimes) > 0 and len(AllflowIdleTimes) > 1): std_idle = stats.stdev(AllflowIdleTimes) else: std_idle = 0 if (sum(AllPacketLengths) > 0 and len(AllPacketLengths) > 1): pkt_len_varience = stats.variance(AllPacketLengths) else: pkt_len_varience = 0 # Invoking iot_hass() function iot_hass(mean_biat, std_biat, max_biat, pkt_len_varience, std_idle, src, target, classifier, dest_mac, src_mac, raw_data)
""" LV-MaxSonar data PW: This pin outputs a pulse width representation of range. The distance can be calculated using the scale factor of 147uS per inch. Range is (0.88, 37.5) in mS """ # begin print( 'Reading from GPIO pin BCM{0}. Press ^Z, ^C, or use another signal to exit.\n' .format(input_pin)) time.sleep(1.5) # continuously measure the input pin (albeit still too slow, so it's effectively undersampling) while not do_exit: # read from input x = int(GPIO.input(input_pin)) # 1 or 0 # apply IIR low pass filter (undersampling, so it requires an average) acc += k * (x - acc) distance = translate(acc, 0, 1, 0.88, 37.5) / 0.147 * 2.51 / 100.0 past_measurements.append(distance) if (len(past_measurements) > 10): past_measurements.pop(0) print('PWM: {0:.2f}\t\tDistance: {1:.2f}m\t\tVariance: {2:.2f}'.format( acc, distance, variance(past_measurements))) time.sleep(0.05) GPIO.cleanup()
def variance(self): return stats.variance(self.values())
def variance(data): return statistics.variance(data)
from statistics import variance, stdev import numpy as np coffee = np.array([202, 177, 121, 148, 89, 121, 137, 158]) #분산 계산 cf_var = variance(coffee) print("Simple Variance :", round(cf_var, 2))
print("Episode: " + str(e) + " Score: " + str(score) + " Max height: " + str(max_h), end="\r", flush=False) iter += 1 # Plot graph of rewards per episode #plt.plot(np.arange(epochs),episode_rewards[epochs*(iter-1):]) #plt.show() print("Mean reward: ", stats.mean(episode_rewards[epochs * (iter - 1):])) mean_rewards.append(stats.mean(episode_rewards[epochs * (iter - 1):])) print("Std Deviation: ", stats.stdev(episode_rewards[epochs * (iter - 1):])) mean_stddevs.append(stats.stdev(episode_rewards[epochs * (iter - 1):])) print("Variance: ", stats.variance(episode_rewards[epochs * (iter - 1):]) / epochs) mean_variances.append( stats.variance(episode_rewards[epochs * (iter - 1):]) / epochs) env.close() plt.plot(np.arange(epochs * iter), episode_rewards) plt.show() print("Overall mean reward: ", stats.mean(episode_rewards)) print("Overall std deviation: ", stats.stdev(episode_rewards)) print("Overall variance: ", stats.variance(episode_rewards) / (epochs * iter)) plt.figure(figsize=(10, 5)) plt.plot(l_rate, mean_rewards) #plt.plot(l_rate,mean_stddevs) #plt.plot(l_rate,mean_variances) plt.show()
mean_Bare_Nuclei = stat.mean(data[:,6]) mean_Bland_Chromatin = stat.mean(data[:,7]) mean_Normal_Nucleoli = stat.mean(data[:,8]) mean_Mitoses = stat.mean(data[:,9]) stdev_Clump_Thickness = stat.stdev(data[:,1]) stdev_Uniformity_of_Cell_Size = stat.stdev(data[:,2]) stdev_Uniformity_of_Cell_Shape = stat.stdev(data[:,3]) stdev_Marginal_Adhesion = stat.stdev(data[:,4]) stdev_Single_Epithelial_Cell_Size = stat.stdev(data[:,5]) stdev_Bare_Nuclei = stat.stdev(data[:,6]) stdev_Bland_Chromatin = stat.stdev(data[:,7]) stdev_Normal_Nucleoli = stat.stdev(data[:,8]) stdev_Mitoses = stat.stdev(data[:,9]) variance_Clump_Thickness = stat.variance(data[:,1]) variance_Uniformity_of_Cell_Size = stat.variance(data[:,2]) variance_Uniformity_of_Cell_Shape = stat.variance(data[:,3]) variance_Marginal_Adhesion = stat.variance(data[:,4]) variance_Single_Epithelial_Cell_Size= stat.variance(data[:,5]) variance_Bare_Nuclei = stat.variance(data[:,6]) variance_Bland_Chromatin = stat.variance(data[:,7]) variance_Normal_Nucleoli = stat.variance(data[:,8]) variance_Mitoses = stat.variance(data[:,9]) skew_Clump_Thickness = skew(data[:,1]) skew_Uniformity_of_Cell_Size = skew(data[:,2]) skew_Uniformity_of_Cell_Shape = skew(data[:,3]) skew_Marginal_Adhesion = skew(data[:,4]) skew_Single_Epithelial_Cell_Size = skew(data[:,5]) skew_Bare_Nuclei = skew(data[:,6])
# Get Gutierrèz de Polini (comprensibilidad) L = num_letters P = num_words F = num_sentences GP_com = mf.gutierres_polini_comprehension(L, P, F) # Get mean number of letters per word let_per_word = [len(list(x)) for x in text] x_hat = sum(let_per_word) / len(let_per_word) # Get variance of number of letters per word variance = statistics.variance(let_per_word) # Get Muñoz-Muñoz (readability) n = num_words x_hat = x_hat variance = variance MM_read = mf.munoz_munoz_read(n, x_hat, variance) # Get sentences per hundred words hun_sentences = mf.get_sentences(' '.join(hun_words)) # Get syllables per hundred words
import statistics data = [0, 1, 2, 3, 4, 5, 6] print(statistics.mean(data)) print(statistics.variance(data)) from urllib.request import urlopen with urlopen("http://tycho.usno.navy.mil/cgi-bin/timer.pl") as response: for line in response: line = line.decode("utf-8") if "EST" in line or "EDT" in line: print(line) from datetime import date now = date.today() birthday = date(1910, 5, 10) age = now - birthday print(age.days) print(int(age.days) // 365) print(age.days // 365)
log(max_val - observed_val) - log(val)) with Pool(processes=processes) as pool: empirical_dist = sorted( pool.map( partial(draw_sample, population=population, k=len(regions)), range(permutations))) pval = sum(val >= observed_val for val in empirical_dist) / permutations empirical_mean = mean(empirical_dist) if empirical_mean == 0: raise RuntimeError( 'The mean of the empirical distribution appears to be zero. ' 'Increasing the number of permutations MIGHT solve this problem.') fold_change = observed_val / empirical_mean if parametric: empirical_var = variance(empirical_dist) a = empirical_mean**2 / empirical_var scale = empirical_var / empirical_mean mean_pp = gamma.cdf(empirical_mean, a, scale=scale) if mean_pp <= conf / 2: empirical_conf_lower = 0 empirical_conf_upper = gamma.ppf(conf, a, scale=scale) elif mean_pp >= 1 - conf / 2: empirical_conf_lower = gamma.ppf((1 - conf) / 2, a, scale=scale) empirical_conf_upper = gamma.ppf(1 - (1 - conf) / 2, a, scale=scale) else: empirical_conf_lower = gamma.ppf(mean_pp - conf / 2, a, scale=scale)
["Mode", statistics.mode(results_drawn)], ["Modes", statistics.multimode(results_drawn)], # (2C) Percentiles ["Median", statistics.median(results_drawn)], [ "Percentiles", statistics.quantiles(results_drawn, n=4, method='inclusive') ], # inclusive, exclusive [ "Interquartile range", scipy.stats.iqr(results_drawn, interpolation='midpoint') ], # {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} # (3) Measure of dispersion # (3A) Sample # PICK ONE OF THESE ["Sample variance", statistics.variance(results_drawn)], ["Sample standard deviation", statistics.stdev(results_drawn)], # (3B) Population # PICK ONE OF THESE ["Population variance", statistics.pvariance(results_drawn)], ["Population standard deviation", statistics.pstdev(results_drawn)], # (4) Distortion of symmetry # (4A) Skewness ["Skewness", scipy.stats.skew(results_drawn)], # (4B) Kurtosis ["Kurtosis", scipy.stats.kurtosis(results_drawn)], # (5) Confidence interval [
fft_column_x = get_fft_values(raw_train_data[:9000, 0], 1.5 / len(column_x), len(column_x), 100) fft_column_y = get_fft_values(raw_train_data[:9000, 1], 1.5 / len(column_x), len(column_x), 100) fft_column_z = get_fft_values(raw_train_data[:9000, 2], 1.5 / len(column_x), len(column_x), 100) #raw stats feature_for_one_csv.append(stats.mean(column_x)) # stats.harmonic_mean(column_x) # exista si valori negative pe care nu stie sa le trateze (sol posibila: aduna o valoare ct) feature_for_one_csv.append(stats.median(column_x)) feature_for_one_csv.append(stats.median_low(column_x)) feature_for_one_csv.append(stats.median_high(column_x)) feature_for_one_csv.append(stats.median_grouped(column_x)) # stats.mode(column_x) # exista 4 equally common values feature_for_one_csv.append(stats.pstdev(column_x)) feature_for_one_csv.append(stats.pvariance(column_x)) feature_for_one_csv.append(stats.stdev(column_x)) feature_for_one_csv.append(stats.variance(column_x)) feature_for_one_csv.append(stats.mean(column_y)) # stats.harmonic_mean(column_y) feature_for_one_csv.append(stats.median(column_y)) feature_for_one_csv.append(stats.median_low(column_y)) feature_for_one_csv.append(stats.median_high(column_y)) feature_for_one_csv.append(stats.median_grouped(column_y)) # stats.mode(column_y) feature_for_one_csv.append(stats.pstdev(column_y)) feature_for_one_csv.append(stats.pvariance(column_y)) feature_for_one_csv.append(stats.stdev(column_y)) feature_for_one_csv.append(stats.variance(column_y)) feature_for_one_csv.append(stats.mean(column_z)) # stats.harmonic_mean(column_z)
triplesPorPartido = [] for line in f: puntosPorPartido.append(int(line[0])) faltasPorPartido.append(int(line[1])) rebotesPorPartido.append(int(line[2])) tirosLibresPorPartido.append(int(line[3])) triplesPorPartido.append(int(line[4])) print("---- PUNTOS POR PARTIDO ----") print("Media: " + str(stats.mean(puntosPorPartido)) + " || " + "Moda: " + str(stats.mode(puntosPorPartido)) + " || " + "Máximo: " + str(max(puntosPorPartido)) + " || " + "Mínimo: " + str(min(puntosPorPartido)) + " || " + "Varianza: " + str(stats.variance(puntosPorPartido))) print("---- FALTAS POR PARTIDO ----") print("Media: " + str(stats.mean(faltasPorPartido)) + " || " + "Moda: " + str(stats.mode(faltasPorPartido)) + " || " + "Máximo: " + str(max(faltasPorPartido)) + " || " + "Mínimo: " + str(min(faltasPorPartido)) + " || " + "Varianza: " + str(stats.variance(faltasPorPartido))) print("---- REBOTES POR PARTIDO ----") print("Media: " + str(stats.mean(rebotesPorPartido)) + " || " + "Moda: " + str(stats.mode(rebotesPorPartido)) + " || " + "Máximo: " + str(max(rebotesPorPartido)) + " || " + "Mínimo: " + str(min(rebotesPorPartido)) + " || " + "Varianza: " + str(stats.variance(rebotesPorPartido))) print("---- TIROS LIBRES POR PARTIDO ----") print("Media: " + str(stats.mean(tirosLibresPorPartido)) + " || " + "Moda: " + str(stats.mode(tirosLibresPorPartido)) + " || " + "Máximo: " + str(max(tirosLibresPorPartido)) + " || " + "Mínimo: " + str(min(tirosLibresPorPartido)) + " || " + "Varianza: " + str(stats.variance(tirosLibresPorPartido))) print("---- TRIPLES POR PARTIDO ----") print("Media: " + str(stats.mean(triplesPorPartido)) + " || " + "Moda: " + str(stats.mode(triplesPorPartido)) + " || " + "Máximo: " + str(max(triplesPorPartido)) + " || " + "Mínimo: " + str(min(triplesPorPartido)) + " || " + "Varianza: " + str(stats.variance(triplesPorPartido)))
from statistics import mean from statistics import median from statistics import mode from statistics import variance #import print(variance([1,1,1,2,2])) #print(mode([1,1,1,2,2])) #print(median([1,1,1,2,2])) #print(mean([1,2,2,2,1,3,4,1,5]))
""" Created on Tue Jun 9 11:32:35 2020 @author: gabecagnazzi """ #Exercise 5.28: Intro to Data Science survey response statitistics import numpy as np import statistics responses = [1, 2, 5, 4, 3, 5, 2, 1, 3, 3, 1, 4, 3, 3, 3, 2, 3, 3, 2, 5] u_elements, count_elements = np.unique(responses, return_counts=True) print("Frequency of responses: ") for i in range(len(u_elements)): print("there are ", responses[i], "with values of ", count_elements[i]) print("\n\nThe statistics are: ") print("Min: ", min(responses)) print("Max: ", max(responses)) print("Range: ", max(responses) - min(responses)) print("Median: ", statistics.median(responses)) print("Mode: ", statistics.mode(responses)) print("Variance: ", statistics.variance(responses)) print("Standard Deviation: ", statistics.stdev(responses))