def process(indir, outdir, settingsfile, paramfile): """ Main sound file processing procedure. Finds all wave files in 'indir', reads them in one by one, and applies functions corresponding to each parameter in the parameters file. If no parameters file is specified, the default is "defaults/parameters/default.csv". If no settings file is specified, the default is "defaults/settings/default.csv" """ measurements = measure.measurements params = helpers.get_parameters(paramfile) settings = helpers.get_settings(settingsfile) frameshift = int(settings["frameshift"]) print ("indir=%s, outdir=%s" % (indir, outdir)) # make a list of wav files # TODO fix this so it uses absolute file paths (os.getenv) if indir[-1] != "/": indir = indir + "/" filelist = [indir + f for f in os.listdir(indir) if f.endswith(".wav")] for wav in filelist: print "Processing ", wav matfile = wav[:-3] + "mat" # TODO TextGrid stuff # build SoundFile object soundfile = helpers.SoundFile(settings, wav) # run the measurements for param in params: soundfile.measurements[param] = measurements[param](soundfile) # it is what it is... print "Done processing."
def generate_test_file(wavfile): ''' Generates a file from a wave file in defaults/sounds to use for testing purposes ''' global tester sf = "../defaults/settings/default.csv" pf = "../defaults/parameters/default.csv" settings = helpers.get_settings(sf) params = helpers.get_parameters(pf) Fs, data = sio.read(wavfile) data_len = math.floor(len(data) / Fs * 1000 / int(settings['frameshift'])) soundfile = helpers.SoundFile(settings, wavfile) return soundfile
def main(): positive_set = '../latest_analogy/test_extractions/bc_samples.txt' #'test_extractions/test-neural-hash-samples.txt' negative_set = '../latest_analogy/test_extractions/bc_grounds.txt' #'test_extractions/test-neural-hash-ground.txt' analogy_list = functions.get_list_re(positive_set) non_analogy_list = functions.get_list_re(negative_set) samples = [(text, 1) for text in analogy_list] + [(text, 0) for text in non_analogy_list] train_data, train_labels, test_data, test_labels = functions.preprocess( samples, 0.15) overlap_input = [('LP', 'count'), ('TSVM', 'tfidf')] rng = np.random.RandomState(42) random_unlabeled_points = rng.rand(len(train_labels)) < 0.7 train_labels = np.array(train_labels) test_labels = np.array(test_labels) train_labels[random_unlabeled_points] = -1 train_data = np.array(train_data) prediction_second_input = [] pipeline = [] no_as_yes = [] # predictions with label NO classified with label YES yes_as_no = [] # predictions with label YES classified with label NO count = 0 for element in overlap_input: pipeline = helpers.get_function(element[0]) representation = helpers.get_function(element[1]) parameters = helpers.get_parameters(element[0]) train_set = representation.fit_transform(train_data).toarray() test_set = representation.transform(test_data).toarray() grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=10, error_score=-1) grid_search.fit(train_set, train_labels) if count == 0: prediction = grid_search.best_estimator_.predict(test_set) matrix = confusion_matrix(test_labels, prediction, labels=[1, 0]) else: prediction_second_input = grid_search.best_estimator_.predict( test_set) matrix = confusion_matrix(test_labels, prediction_second_input, labels=[1, 0]) count += 1 print(matrix) for i in range(len(test_labels)): #print(test_labels[i], prediction[i], prediction_second_input[i]) if (test_labels[i] != prediction[i]) and ( prediction[i] == prediction_second_input[i]): if test_labels[i] == 0: no_as_yes.append(test_data[i]) else: yes_as_no.append(test_data[i]) print("Overlapping NO as YES:") l1 = len(no_as_yes) print("Number: ", l1) for i in range(l1): print(no_as_yes[i]) print("Overlapping YES as NO:") l2 = len(yes_as_no) print("Number: ", l2) for i in range(l2): print(yes_as_no[i])