def gen_avg_pair_stream(iterable): for pair in iterable: if (None, None) == pair: yield None else: yield pair[0] if pair[1] == None else ( pair[1] if pair[0] == None else average(pair) )
def sumarize_rdt_results(rundir) : results = {} # Compute average and confidence interval for each rdt file. for f in rdtfiles_l : k =f[0] # Step name fn=f[2] # RDT file name if summarize_results : try: rdtfn=os.path.join(rundir,fn) rdt_d=rdt.read(rdtfn) elapsed_list=rdt.get_column_values(rdt_d,"ELAPSED") try: av=stats.average(elapsed_list) except stats.StatsError, e: print "WARNING: Could not compute average for results at", fn, "(", e, ")" av=0.0 try: ci=stats.conf_int(elapsed_list, 95.0) except stats.StatsError, e: print "WARNING: Could not compute confidence interval for results at", fn, "(", e, ")" ci=0.0 except rdt.RdtError, e: print "WARNING: error when summarizing results for", fn, "(", e, ")" av=0.0 ci=0.0
def sumarize_rdt_results(rundir): results = {} # Compute average and confidence interval for each rdt file. for f in rdtfiles_l: k = f[0] # Step name fn = f[2] # RDT file name if summarize_results: try: rdtfn = os.path.join(rundir, fn) rdt_d = rdt.read(rdtfn) elapsed_list = rdt.get_column_values(rdt_d, "ELAPSED") try: av = stats.average(elapsed_list) except stats.StatsError, e: print "WARNING: Could not compute average for results at", fn, "(", e, ")" av = 0.0 try: ci = stats.conf_int(elapsed_list, 95.0) except stats.StatsError, e: print "WARNING: Could not compute confidence interval for results at", fn, "(", e, ")" ci = 0.0 except rdt.RdtError, e: print "WARNING: error when summarizing results for", fn, "(", e, ")" av = 0.0 ci = 0.0
def pearsons_product_moment(wavx, wavy): """ https://en.wikipedia.org/wiki/Correlation_and_dependence :param wav1: :param wav2: :return: """ avg_x = stats.average(wavx) avg_y = stats.average(wavy) xXyY = 0 xX2 = 0 yY2 = 0 for i in range(len(wavx)): xXyY += (wavx[i] - avg_x) * (wavy[i] - avg_y) xX2 += math.pow(wavx[i] - avg_x, 2) yY2 += math.pow(wavy[i] - avg_y, 2) r = xXyY / (math.sqrt(xX2 * yY2)) return r
def stats_forpaper(self,exp_ghosts): """Get the stats for the paper""" # Get the excludes excludes=self.find_excludes(options,exp_ghosts) # Loop over all ghosts for atom in ['N','H','HA']: values=[] absent=[] for tg in sorted(exp_ghosts.keys()): for residue in sorted(exp_ghosts[tg].keys()): if excludes.has_key(tg): if residue in excludes[tg]: continue # # Get the value # if exp_ghosts[tg][residue].has_key(atom): exp_value=exp_ghosts[tg][residue][atom] exp_error=errors[atom] if exp_ghosts[tg][residue].has_key(atom+'_error'): exp_error=exp_ghosts[tg][residue][atom+'_error'] # # Deal with ranges # if exp_value[0]=='q' and options.use_questionable: exp_value=exp_value[1:] # # Deal with ranges - we need to do this better # if len(exp_value.split(';'))==2: s=[] for val in exp_value.split(';'): if val[0] in ['<','>']: val=val[1:] s.append(float(val)) exp_error=abs(s[0]-s[1]) exp_value=float(sum(s))/len(s) if exp_value=='absent': absent.append(residue) else: values.append(abs(float(exp_value))) # # Calculate average # import stats avg=0.0 SD=0.0 if len(values)>0: avg=stats.average(values) SD=stats.stddev(values) print '%2s ghost titrations: %3d, avg: %5.2f (%5.2f), %3d absent ghost titrations' %(atom,len(values),avg,SD,len(absent)) return
def calculate_average(self,data): """Calculate the average ghost observed and the standard deviation""" for datatype in data.keys(): for diel in data[datatype].keys(): for TG in data[datatype][diel].keys(): for residue in data[datatype][diel][TG].keys(): for nucleus in data[datatype][diel][TG][residue].keys(): try: values=data[datatype][diel][TG][residue][nucleus] except KeyError: print 'Skipping %d for %s' %(diel) continue import stats avg=stats.average(values) SD=stats.stddev(values) data[datatype][diel][TG][residue][nucleus]=[avg,SD] return data
def calc(x, conf): size = len(x) sum = stats.sum(x) av = stats.average(sum, size) gm = stats.gmean(x) v = stats.var(sum, stats.sqsum(x), size) med = stats.median(x) if v != 'error': sd = stats.stdv1(v) c = stats.conf(float(conf), sd, size) else: sd = 'error' c = 'none' return av, gm, v, sd, c, med
def compute_best_fit_line(data): """ Computs the line of best fit for a list of values. Assumes x value is the index of the item in the list. http://hotmath.com/hotmath_help/topics/line-of-best-fit.html :param data: list of data points :return: (m, b) or .. (slope, yoffset) """ avg_x = len(data) / 2 avg_y = stats.average(data) xXyY = 0 xX2 = 0 for x in range(len(data)): xXyY += (x - avg_x) * (data[x] - avg_y) xX2 += math.pow(x - avg_x, 2) slope_m = xXyY / xX2 yoffet_b = avg_y - slope_m * avg_x return slope_m, yoffet_b
def spectral_flux(pack1, pack2): """ Calculates the average of spectral flux difference between two sample packs. The temporal location of pack1 should be before that of pack2. :param pack1: (i-1)th sample pack :param pack2: (i)th sample pack :return: """ fft1 = into_freq_domain(pack1) fft2 = into_freq_domain(pack2) fluxes = list() for i in range(len(fft1)): fluxes.append(pow(fft2[i] - fft1[i], 2)) return stats.average(fluxes)
def determine_bpm(pcm_data): ############################################# # Maintenance of all calculated energies # and average energies: ############################################# energy_spikes = [] energy_averages = [] energy_averages_SDs = [] energies = [] ############################################# # Iteration over sample packs of song: ############################################# # static parameters SAMPLE_PACK_SIZE = int(256 / 2) ENERGY_HISTORY_SIZE = int((44100 * 1.3) / SAMPLE_PACK_SIZE) THRESHOLD_C = 1.3 # contains the last <ENERGY_HISTORY_SIZE> energies energy_history = deque(maxlen=ENERGY_HISTORY_SIZE) start = 0 # int(len(pcm_data) * 0.1) end = start + int(44100 * 120) # 60 seconds end = min(end, len(pcm_data)) # cuts off the extra samples at the end. nthIndex = range(start, end, SAMPLE_PACK_SIZE) for i in nthIndex: sample_pack = pcm_data[i:i + SAMPLE_PACK_SIZE] if len(sample_pack) != SAMPLE_PACK_SIZE: continue # calculate the instance energy (squared) of this sample pack energy = stats.average(stats.squared(sample_pack)) # append the instance energy to the right of the history list energy_history.append(energy) if len(energy_history) >= ENERGY_HISTORY_SIZE: # the history buffer is full so we can begin comparing average energy energies.append(energy) average_energy = stats.average(energy_history) average_energy_diff = stats.linear_dev(energy_history, average_energy) energy_averages.append(average_energy) energy_averages_SDs.append(average_energy_diff) determined_thresh = average_energy * 1.4 + 0.1 * average_energy_diff # print determined_C # check for energy spike if energy > determined_thresh: # we have an energy spike! energy_spikes.append(energy) else: # no spike energy_spikes.append(0) BPMs = range(60, 200, 1) periods = [int((60.0 / bpm) * 44100 / SAMPLE_PACK_SIZE) for bpm in BPMs] period_i, correlations = compute_beat_with_impulse_trains(energies, periods) bpm = BPMs[period_i] return bpm
def register_sample (app, app_id, tag, day_of_the_week = None): user = User.objects.filter(app = app, app_id = app_id).first() ## Clear all the events of that tag ## Then recreate them from the new data events = Event.objects.filter(user = user, tag = tag) if day_of_the_week is not None: events = events.filter(day_of_week = day_of_the_week) for event in events: event.is_active = False event.save() app = importlib.import_module(user.app + "." + user.app) event_times = getattr(app, "%s_times" % tag)(app_id, day_of_the_week = day_of_the_week) if len(event_times) < 2: return if len(event_times[0]) < 2: return pmf = stats.event_pmf(event_times, 1440) pmf_average = stats.average(pmf) if pmf_average < minimum_pmf_mean(tag): ## All weak probabilities. Only outlier events. return pmf_variance = stats.variance(pmf, average = pmf_average) pmf_std = stats.standard_deviation(pmf, variance = pmf_variance) in_event = False event_start_minutes = [] event_end_minutes = [] event_probabilites = [] for minute in range(0,1440): if pmf[minute] > pmf_average + pmf_variance: if in_event is False: event_start_minutes.append(minute) in_event = True else: if in_event is True: event_end_minutes.append(minute) in_event = False if len(event_start_minutes) > len(event_end_minutes): ## Assume the last event started at night and ends in the morning event_start_minutes[0] = event_start_minutes[len(event_start_minutes) - 1] del event_start_minutes[len(event_start_minutes) - 1] ## If events are too close together, combined them. for index in range(0, len(event_end_minutes)): if index + 1 >= len(event_start_minutes): break event_end_time = event_end_minutes[index] next_event_start_time = event_start_minutes[index + 1] time_between_event = next_event_start_time - event_end_time if time_between_event < minimum_time_between_event(tag): del event_end_minutes[index] del event_start_minutes[index + 1] for index in range(0, len(event_start_minutes)): start_minute = event_start_minutes[index] end_minute = event_end_minutes[index] if start_minute < end_minute: event_probability_set = pmf[start_minute:end_minute] else: event_probability_set = pmf[start_minute:1439] event_probability_set.extend(pmf[0:end_minute]) event_average_probablity = stats.average(event_probability_set) event_probability_variance = stats.variance(event_probability_set, average = event_average_probablity) fringe_start_time = start_minute - fringe_time_for_event(tag) if fringe_start_time < 0: fringe_start_time = 1440 + fringe_start_time fringe_end_time = end_minute + fringe_time_for_event(tag) if fringe_end_time > 1440: fringe_end_time = fringe_end_time - 1440 if fringe_end_time > fringe_start_time: fringe_pmf = pmf[fringe_start_time:fringe_end_time] else: fringe_pmf = pmf[fringe_start_time:1439] fringe_pmf.extend(pmf[0:fringe_end_time]) fringe_average_probability = stats.average(fringe_pmf) fringe_variance = stats.variance(fringe_pmf, average = fringe_average_probability) start_hour = float(start_minute)/60.0 end_hour = float(end_minute)/60.0 e = Event.objects.create(user = user, tag = tag, start_time = start_hour, end_time = end_hour, day_of_week = day_of_the_week, probability = event_average_probablity, probability_variance = event_probability_variance, fringe_probability = fringe_average_probability, fringe_variance = fringe_variance) e.save()
import matplotlib import matplotlib.pyplot as plt import rsg import stats HARMONICS = 8 FREQUENCY = 1200 N = 1024 COMPLEXITY_LIMIT = 16384 sigs = rsg.generate(HARMONICS, FREQUENCY, N) print("Math Expectation", stats.average(sigs)) print("Math Dispersion", stats.dispersion(sigs)) fig, (ax1, ax2) = plt.subplots(2, 1) plt.tight_layout(pad=4) ax1.plot(sigs) ax1.set(xlabel='time', ylabel='signal', title='Random generated signals') ax2.plot(rsg.complexity(1024)) ax2.set(xlabel='number of intervals', ylabel='time', title='Complexity of algorithm') fig.savefig("test.png") plt.show()
print 'desc: ', s_desc # Print results if status != 0: print "Status [FAILED] (" + str(status) + ")" else: print "Status [OK]" print "Results summary ----------------------------" for rdt_id, v in rdt_files.iteritems(): if summarize_results: try: fn = v[0] rdt_d = rdt.read(fn) elapsed_list = rdt.get_column_values(rdt_d, "ELAPSED") try: av = stats.average(elapsed_list) except stats.StatsError, e: print "WARNING: Could not compute average for results at", fn, "(", e, ")" av = 0.0 try: ci = stats.conf_int(elapsed_list, 95.0) except stats.StatsError, e: print "WARNING: Could not compute confidence interval for results at", fn, "(", e, ")" ci = 0.0 except rdt.RdtError, e: print "WARNING: error when summarizing results for", fn, "(", e, ")" av = 0.0 ci = 0.0 print '{0:15s} : {1:>16f} +- {2:<16f} : {3:s}'.format( rdt_id, av, ci, v[1]) else:
def trapazoid_area(bottom, side1, side2): return bottom * ( average( (side1, side2) ) )
print 'desc: ', s_desc # Print results if status != 0: print "Status [FAILED] ("+str(status)+")" else : print "Status [OK]" print "Results summary ----------------------------" for rdt_id,v in rdt_files.iteritems() : if summarize_results : try: fn=v[0] rdt_d=rdt.read(fn) elapsed_list=rdt.get_column_values(rdt_d,"ELAPSED") try: av=stats.average(elapsed_list) except stats.StatsError, e: print "WARNING: Could not compute average for results at", fn, "(", e, ")" av=0.0 try: ci=stats.conf_int(elapsed_list, 95.0) except stats.StatsError, e: print "WARNING: Could not compute confidence interval for results at", fn, "(", e, ")" ci=0.0 except rdt.RdtError, e: print "WARNING: error when summarizing results for", fn, "(", e, ")" av=0.0 ci=0.0 print '{0:15s} : {1:>16f} +- {2:<16f} : {3:s}'.format(rdt_id, av, ci, v[1]) else: print '{0:15s} : {1:s} : {2:s}'.format(rdt_id,v[0],v[1])
def update(self): self.last_week_avg_aqi = stats.average(self, 7) self.last_month_avg_aqi = stats.average(self, 30) self.all_time_avg_aqi = stats.average(self, 0) self.put()
def ave_over_interval(filename, start_row, end_row): (chan1_ave, chan2_ave) = tuple( average(points[i] for points in gen_points_over_interval(filename, start_row, end_row) if points[i] != None) for i in range(1 + 1) ) return (chan1_ave, chan2_ave, average((chan1_ave, chan2_ave)))
# additional task for lab 1.1 import matplotlib import matplotlib.pyplot as plt import rsg import stats HARMONICS = 8 FREQUENCY = 1200 Ns = list(map(lambda num: 2**num, list(range(1, 12)))) Mxs = list() for N in Ns: Mxs.append(stats.average(rsg.generate(HARMONICS, FREQUENCY, N))) fig, ax = plt.subplots() ax.plot(Ns, Mxs, c="r") ax.set_xlim(0, Ns.pop()) fig.savefig("example-mx.png") plt.show()