def cross_validation(T, y): """Use Cross Validation (leave-one-out) to select features. Args: T: feature statistics list y: labels """ from sklearn.model_selection import LeaveOneOut y = np.array(y) judge = list() for train_index, valid_index in LeaveOneOut().split(T): T_train = T[train_index] T_valid = T[valid_index] y_train = y[train_index] y_valid = y[valid_index] T_train, mean, std = feature.normalize(T_train) T_principle, T_principle_index, dist, AUC = feature_select(T_train, y_train, k=3) ts = threshold(dist, y_train) C = gen_center(T_principle, y_train) T_valid = (T_valid - mean) / std dist_valid = util.distance(T_valid.T[T_principle_index].T, C) if y_valid[0] == 1: if dist_valid[0] < ts: judge.append(1) else: judge.append(0) else: if dist_valid[0] < ts: judge.append(0) else: judge.append(1) accuracy = sum(judge) / len(judge) return accuracy
def cross_validation2(T, y): """Use Cross Validation (leave-one-out) to select features. Args: T: feature statistics list y: labels """ from sklearn.model_selection import LeaveOneOut y = np.array(y) judge = list() T_principle_index = np.array([0, 18, 43]) for train_index, valid_index in LeaveOneOut().split(T): T_train = T[train_index] T_valid = T[valid_index] y_train = y[train_index] T_train, mean, std = feature.normalize(T_train) T_principle = T_train.T[T_principle_index].T C = gen_center(T_principle, y_train) dist = util.distance(T_principle, C) ts = threshold(dist, y_train) T_valid = (T_valid - mean) / std dist_valid = util.distance(T_valid.T[T_principle_index].T, C) if dist_valid[0] < ts: judge.append(1) else: judge.append(0) return np.array(judge)
def guess_syllables(wave, window_size=10, len_threshold=0.01): amp, _ = segmentation(wave) windowed_amp = feature.normalize(moving_window(amp, window_size)) maximi=signal.argrelmax(windowed_amp, order=7) #plot(windowed_amp) #show() return len(maximi[0])
def plotstuff(filename): a=fileio.Wave("audio/samples/"+filename+".wav") am, fr = syllable.segmentation(a) title(filename) plot(feature.normalize(syllable.moving_window(am, 10)), label="amp") #plot(feature.normalize(syllable.moving_window(fr, 10)), label="freq") #plot(feature.normalize(am), label="amp") #plot(feature.normalize(fr), label="freq") legend(framealpha=0.5); show()
def populateSampleData(sample_directory, freq=False): print("loading data") files = readAllFilesInDirectory(sample_directory); sampleData = {} abstractSampleData={} fileNames = set([x[1] for x in files]) for name in fileNames: sampleData[name] = [] abstractSampleData[name] =[] for file in files: sampleData[file[1]].append(file[0]) #compute signatures: for key in sampleData.keys(): processed=[] for sample in sampleData[key]: processed.append(feature.normalize(feature.abstract_cartoon(sample, freq=freq))) print(key, len(processed)) for i in range(1,len(processed)): processed[i] = feature.align_peaks(processed[i], processed[i-1], 1)+processed[i-1] #abstractSampleData[key] = feature.normalize(feature.align_peaks(processed[0], processed[1], 1) + processed[1]) abstractSampleData[key] = feature.normalize(processed[-1]) return abstractSampleData
def predict(T, y, T_test): y = np.array(y) T, mean, std = feature.normalize(T) T_principle, T_principle_index, dist, AUC = feature_select(T, y, k=4) ts = threshold(dist, y) C = gen_center(T_principle, y) T_test = (T_test - mean) / std dist_test = util.distance(T_test.T[T_principle_index].T, C) judge = list() for d in dist_test: if d < ts: judge.append(1) else: judge.append(0) return np.array(judge), T_principle_index, AUC, dist