def __evalFunction(self, toks): val = toks[0] fnName = val[0].upper() args = val[1:] args = [arg for arg in args if arg is not None] # Filter nones if not args: return 0 if fnName == 'SUM': return sum(args) elif fnName == 'AVE': from tools import average return average(args) elif fnName == 'MAX': return max(args) elif fnName == "MIN": return min(args) elif fnName == "COUNT": return len(args) elif fnName == "ALARMS": return self.alarm_list elif fnName == "DISTANCE": dist = 0 # self.prior_batch_last_record.columnValue() last_gp = None for gp in args: gp = tools.safe_geopoint(gp) if last_gp and gp: dist += tools.calcLocDistance(last_gp, gp) if gp: last_gp = gp return dist # m elif fnName == "SQRT": arg = args[0] return math.sqrt(arg) return 0
def __baseline(self, space): if len(space) < self.baselines: k = len(space) else: k = self.baselines samples = sample(space, k) baselines = [ratio for ratio, _ in samples] return average(baselines)
def compute_accuracy(listup): '''input <-- a list of tuples i.e. [(Bl1, 0.1), (Bl2,0.3)...] i.e. self.past self.progrss, self.all output: --> a number indicating accuracy''' all_accuracy = [] for tup in listup: all_accuracy.append(take_latest(tup)) return average(all_accuracy)
def draw_graph(self): #draw pie chart to show progress output = self.output labels, fracs, emphasis = ['done','progress','waiting'], [self.allNum - self.progressNum, self.progressNum, self.tot], [1] print fracs tit = self.name+' ' +'para_quantity' print tit pie_chart(fracs,labels,tit,output,emphasis = emphasis,show = False) #draw correctness of progress cordata = {self.name:[]} corsub = cordata[self.name] extdata = {self.name:[]} extsub = extdata[self.name] xlabels = [] for serialNum in self.progress: corsub.append(self.info[serialNum]['correctness']) extsub.append(self.info[serialNum]['extraction'][0]) xlabels.append(serialNum) tit = self.name+' ' +'most_recent_correctness' print 'computing understaning aggregate' if 'understanding' not in self.sinfo['aggregate']['current']: self.sinfo['aggregate']['past']['understanding'] = 0 else: self.sinfo['aggregate']['past']['understanding'] = self.sinfo['aggregate']['current']['understanding'] self.sinfo['aggregate']['current']['understanding'] = 100 - 10*average(extsub) try: normal_plot(cordata, xlabels, tit, output, xaxis = None, yaxis = None) finally: tit = self.name + ' ' + 'most_recent_extraction' #print extdata normal_plot(extdata, xlabels, tit, output, xaxis = None, yaxis = None) info = self.info['date'] cordata = {self.name:[]} corsub = cordata[self.name] extdata = {self.name:[]} extsub = extdata[self.name] self.recent.reverse() dates = [] for date in self.recent: if date in info: dates.append(date) corsub.append(info[date]['correctness'][0]) extsub.append(info[date]['extraction'][0]) xlabels = dates tit = self.name+' ' +'dates_recent_correctness' normal_plot(cordata, xlabels, tit, output, xaxis = None, yaxis = None) tit = self.name + ' ' + 'dates_recent_extraction' normal_plot(extdata, xlabels, tit, output, xaxis = None, yaxis = None)
def pearsons_product_moment(wavx, wavy): """ https://en.wikipedia.org/wiki/Correlation_and_dependence :param wav1: :param wav2: :return: """ avg_x = tools.average(wavx) avg_y = tools.average(wavy) xXyY = 0 xX2 = 0 yY2 = 0 for i in range(len(wavx)): xXyY += (wavx[i] - avg_x) * (wavy[i] - avg_y) xX2 += math.pow(wavx[i].real - avg_x, 2) yY2 += math.pow(wavy[i].real - avg_y, 2) r = xXyY / (math.sqrt(xX2 * yY2)) return r
def compute_best_fit_line(data): """ Computs the line of best fit for a list of values. Assumes x value is the index of the item in the list. http://hotmath.com/hotmath_help/topics/line-of-best-fit.html :param data: list of data points :return: (m, b) or .. (slope, yoffset) """ avg_x = len(data) / 2 avg_y = tools.average(data) xXyY = 0 xX2 = 0 for x in range(len(data)): xXyY += (x - avg_x) * (data[x].real - avg_y) xX2 += math.pow(x - avg_x, 2) slope_m = xXyY / xX2 yoffet_b = avg_y - slope_m * avg_x return slope_m, yoffet_b
def __evalFunction(self, toks): val = toks[0] fnName = val[0].upper() args = val[1:] args = [arg for arg in args if arg is not None] # Filter nones if not args: return 0 if fnName == 'SUM': return sum(args) elif fnName == 'AVE': from tools import average return average(args) elif fnName == 'MAX': return max(args) elif fnName == "MIN": return min(args) elif fnName == "COUNT": return len(args) elif fnName == "ALARMS": # Usage: ALARMS([rule_id]) # Returns list of alarms in processed batch, optionally filtered by rule_id alarm_list = list(self.alarm_list) if args and args[0].isdigit(): rule_id = int(args[0]) if rule_id: alarm_list.filter(lambda al: tools.getKey( Alarm, 'rule', al, asID=True) == rule_id) return alarm_list elif fnName == "DISTANCE": dist = 0 # self.prior_batch_last_record.columnValue() last_gp = None for gp in args: gp = tools.safe_geopoint(gp) if last_gp and gp: dist += tools.calcLocDistance(last_gp, gp) if gp: last_gp = gp return dist # m elif fnName == "SQRT": arg = args[0] return math.sqrt(arg) return 0
def __evalFunction(self, toks): val = toks[0] fnName = val[0].upper() args = val[1:] args = [arg for arg in args if arg is not None] # Filter nones if not args: return 0 if fnName == 'SUM': return sum(args) elif fnName == 'AVE': from tools import average return average(args) elif fnName == 'MAX': return max(args) elif fnName == "MIN": return min(args) elif fnName == "COUNT": return len(args) elif fnName == "ALARMS": # Usage: ALARMS([rule_id]) # Returns list of alarms in processed batch, optionally filtered by rule_id alarm_list = list(self.alarm_list) if args and args[0].isdigit(): rule_id = int(args[0]) if rule_id: alarm_list.filter(lambda al : tools.getKey(Alarm, 'rule', al, asID=True) == rule_id) return alarm_list elif fnName == "DISTANCE": dist = 0 # self.prior_batch_last_record.columnValue() last_gp = None for gp in args: gp = tools.safe_geopoint(gp) if last_gp and gp: dist += tools.calcLocDistance(last_gp, gp) if gp: last_gp = gp return dist # m elif fnName == "SQRT": arg = args[0] return math.sqrt(arg) return 0
def testAggregatedExpressionParsing(self): from models import Record record_list = [] start_ms = tools.unixtime() ts_data = [ long(start_ms + x) for x in range(0, 10 * 10 * 1000, 10 * 1000) ] # 10 sec apart x_data = [4, 5, 6, 7, 5, 2, 1, 0, 1, 4] y_data = [0, 0, 1.0, 1.0, 1.0, 1, 0, 0, 0, 0] for i, ts, x, y in zip(range(10), ts_data, x_data, y_data): r = Record() r.setColumnValue("_ts", ts) r.setColumnValue("x", x) r.setColumnValue("y", y) record_list.append(r) now_ms = tools.unixtime() import numpy as np volley = [ ["DOT({_ts},{y})", np.dot(ts_data, y_data)], ["MAX({y})", max(y_data)], ["MIN({y})", 0], ["AVE({x})", tools.average(x_data)], ["COUNT({y})", 10], ["DOT(DELTA({_ts}), {y}) / 1000", 40] # 40 secs ] for v in volley: expr = v[0] target = v[1] tick = datetime.now() ep = ExpressionParser(expr, verbose=True, run_ms=now_ms) result = ep.run(record_list=record_list) tock = datetime.now() diff = tock - tick ms = diff.microseconds / 1000 logmessage = "%s took %d ms" % (expr, ms) if ms > 100: logmessage += " <<<<<<<<<<<<<<<<<<<<<<<<<<< SLOW OP!" print logmessage self.assertEqual(result, target)
def calculate_mfcc(audio_filename): """ Calculate MFCC features for the audio in a given file Args: audio_filename: file name of the audio Returns: feature_vectors: MFCC feature vector for the given audio file """ fs, audio = wav.read(audio_filename) # Make stereo audio being mono if len(audio.shape) == 2: audio = (audio[:, 0] + audio[:, 1]) / 2 # Calculate MFCC feature with the window frame it was designed for input_vectors = mfcc(audio, winlen=0.02, winstep=0.01, samplerate=fs, numcep=MFCC_INPUTS) input_vectors = [average(input_vectors[:, i], 5) for i in range(MFCC_INPUTS)] feature_vectors = np.transpose(input_vectors) return feature_vectors
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) ############################################# # Maintenance of all calculated energies # and average energies: ############################################# energy_spikes = [] energy_averages = [] energy_averages_SDs = [] energies = [] ############################################# # Iteration over sample packs of song: ############################################# # static parameters SAMPLE_PACK_SIZE = int(256 / 2) ENERGY_HISTORY_SIZE = int((44100 * 1.3) / SAMPLE_PACK_SIZE) THRESHOLD_C = 1.3 # contains the last <ENERGY_HISTORY_SIZE> energies energy_history = deque(maxlen=ENERGY_HISTORY_SIZE) start = int(len(amp_data) * 0.1) end = start + int(44100 * 120) # 120 seconds end = min(len(amp_data), end) # cuts off the extra samples at the end. nthIndex = range(start, end, SAMPLE_PACK_SIZE) for i in nthIndex: sample_pack = amp_data[i:i + SAMPLE_PACK_SIZE] if len(sample_pack) != SAMPLE_PACK_SIZE: continue # calculate the instance energy (squared) of this sample pack energy = tools.average(tools.squared(sample_pack), isLR=True) # append the instance energy to the right of the history list energy_history.append(energy) if len(energy_history) >= ENERGY_HISTORY_SIZE: # the history buffer is full so we can begin comparing average energy energies.append(energy) average_energy = tools.average(energy_history) #average_energy_SD = tools.standard_dev(energy_history, average_energy) average_energy_diff = tools.linear_dev(energy_history, average_energy) energy_averages.append(average_energy) energy_averages_SDs.append(average_energy_diff) #determined_thresh = average_energy * (THRESHOLD_C + 0.5 * average_energy_SD) determined_thresh = average_energy * 1.4 + 0.1 * average_energy_diff # print determined_C # check for energy spike if energy > determined_thresh: # we have an energy spike! energy_spikes.append(energy) else: # no spike energy_spikes.append(0) ###################################################################### # period = int((60.0 / bpm) * 44100 / SAMPLE_PACK_SIZE) BPMs = range(60, 200, 1) periods = [ int((60.0 / bpm) * 44100 / SAMPLE_PACK_SIZE) for bpm in BPMs] period_i, correlations = compute_beat_with_impulse_trains(energies, periods) BPM = BPMs[period_i] print "BPM: " + str(BPM) impulse_train = generate_impulse_train(periods[period_i], len(energies), 0.5) ###################################################################### fig, axs = plt.subplots(nrows=2, ncols=1) x = BPMs y = correlations axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('E_BPMs') axs[0].set_xlabel('BPM') # x = range(0, len(energy_spikes)) # y = energy_spikes # y2 = impulse_train # # axs[1].scatter(x, y, marker="|") # # axs[1].bar(x, y, color='g', linewidth=0) # axs[1].plot(x, y, color='g', linewidth=1) # axs[1].plot(x, y2, color='b', linewidth=1, alpha=0.3) # axs[1].set_ylabel('Energy') # axs[1].set_xlabel('Frame') plt.savefig("../graphs/" + args[0].split('/')[-1] + "_graph.png")
def __evalFunction(self, toks): val = toks[0] fnName = val[0].upper() args = val[1:] args = [arg for arg in args if arg is not None] # Filter nones if not args: return 0 if fnName == 'SUM': args = self.__getArglist(args) if args: return [sum(args)] return [0] elif fnName == 'AVE': from tools import average args = self.__getArglist(args) if args: return [average(args)] return [0] elif fnName == 'MAX': args = self.__getArglist(args) if args: res = max(args) return [res] return [0] elif fnName == "MIN": args = self.__getArglist(args) if args: return [min(args)] return [0] elif fnName == "COUNT": args = self.__getArglist(args) return [len(args)] elif fnName == "ALARMS": from models import Alarm # Usage: ALARMS([rule_id]) # Returns list of alarms in processed batch, optionally filtered by rule_id alarm_list = list(self.alarm_list) if args and type(args[0]) in [int, long, float]: rule_id = int(args[0]) if rule_id: alarm_list = [ al for al in alarm_list if tools.getKey( Alarm, 'rule', al, asID=True) == rule_id ] return [alarm_list] elif fnName == "DISTANCE": dist = 0 last_gp = None args = self.__getArglist(args) for gp in args: gp = tools.safe_geopoint(gp) if last_gp and gp: dist += tools.calcLocDistance(last_gp, gp) if gp: last_gp = gp return [dist] # m elif fnName == "SQRT": arg = args[0] return [math.sqrt(arg)] elif fnName == "SINCE": # Returns ms since event (argument), or 0 if none found event = args[0] since = 0 now = self.run_ms try: if event: if type(event) in [long, float]: # Treat as ms timestamp since = now - event elif isinstance(event, basestring): pass elif event.kind() == 'Alarm': since = now - tools.unixtime(event.dt_start) elif event.kind() == 'Record': since = now - tools.unixtime(event.dt_recorded) except Exception, e: logging.warning("Error in SINCE() - %s" % e) return [since]
def main(args): # Load up the song (must be .wav) into memory amp_data, fs, enc = wavread(args[0]) ############################################# # Maintenance of all calculated energies # and average energies: ############################################# energy_spikes = [] energy_averages = [] energy_averages_SDs = [] energies = [] ############################################# # Iteration over sample packs of song: ############################################# # static parameters SAMPLE_PACK_SIZE = int(256 / 2) ENERGY_HISTORY_SIZE = int((44100 * 1.3) / SAMPLE_PACK_SIZE) THRESHOLD_C = 1.3 # contains the last <ENERGY_HISTORY_SIZE> energies energy_history = deque(maxlen=ENERGY_HISTORY_SIZE) start = int(len(amp_data) * 0.1) end = start + int(44100 * 120) # 120 seconds end = min(len(amp_data), end) # cuts off the extra samples at the end. nthIndex = range(start, end, SAMPLE_PACK_SIZE) for i in nthIndex: sample_pack = amp_data[i:i + SAMPLE_PACK_SIZE] if len(sample_pack) != SAMPLE_PACK_SIZE: continue # calculate the instance energy (squared) of this sample pack energy = tools.average(tools.squared(sample_pack), isLR=True) # append the instance energy to the right of the history list energy_history.append(energy) if len(energy_history) >= ENERGY_HISTORY_SIZE: # the history buffer is full so we can begin comparing average energy energies.append(energy) average_energy = tools.average(energy_history) #average_energy_SD = tools.standard_dev(energy_history, average_energy) average_energy_diff = tools.linear_dev(energy_history, average_energy) energy_averages.append(average_energy) energy_averages_SDs.append(average_energy_diff) #determined_thresh = average_energy * (THRESHOLD_C + 0.5 * average_energy_SD) determined_thresh = average_energy * 1.4 + 0.1 * average_energy_diff # print determined_C # check for energy spike if energy > determined_thresh: # we have an energy spike! energy_spikes.append(energy) else: # no spike energy_spikes.append(0) ###################################################################### # period = int((60.0 / bpm) * 44100 / SAMPLE_PACK_SIZE) BPMs = range(60, 200, 1) periods = [int((60.0 / bpm) * 44100 / SAMPLE_PACK_SIZE) for bpm in BPMs] period_i, correlations = compute_beat_with_impulse_trains( energies, periods) BPM = BPMs[period_i] print "BPM: " + str(BPM) impulse_train = generate_impulse_train(periods[period_i], len(energies), 0.5) ###################################################################### fig, axs = plt.subplots(nrows=2, ncols=1) x = BPMs y = correlations axs[0].bar(x, y, facecolor='b', alpha=0.5, linewidth=1, width=1) axs[0].set_ylabel('E_BPMs') axs[0].set_xlabel('BPM') # x = range(0, len(energy_spikes)) # y = energy_spikes # y2 = impulse_train # # axs[1].scatter(x, y, marker="|") # # axs[1].bar(x, y, color='g', linewidth=0) # axs[1].plot(x, y, color='g', linewidth=1) # axs[1].plot(x, y2, color='b', linewidth=1, alpha=0.3) # axs[1].set_ylabel('Energy') # axs[1].set_xlabel('Frame') plt.savefig("../graphs/" + args[0].split('/')[-1] + "_graph.png")
def least_squares(t0, t1, data): price_average = tools.average(data['price']) operation_num = lambda km, price: (price - estimatedPrice(t0, t1, km)) ** 2 operation_den = lambda km, price: (price - price_average) ** 2 return 1 - (tools.sum(t0, t1, data, operation_num) / tools.sum(t0, t1, data, operation_den))