示例#1
0
def p_predict_cnn_thread(audio_path, name_list):
    # initialize variables
    X = np.empty(shape=(len(name_list), N_MFCC, AUDIO_LENGTH, 1))
    # traverse through the name list and process this threads workload
    for i, fname in enumerate(name_list):
        # add a log message to be displayed after processing every 250 files.
        if i % 250 == 0:
            utils.write_log_msg("FEATURE_CNN_PREDICT - {0}...".format(i))
        # read the sound file
        sound_clip, _ = librosa.load(audio_path + fname,
                                     sr=SAMPLE_RATE,
                                     res_type='kaiser_fast')
        # Random offset / Padding
        if len(sound_clip) > INPUT_LENGTH:
            max_offset = len(sound_clip) - INPUT_LENGTH
            offset = np.random.randint(max_offset)
            sound_clip = sound_clip[offset:(INPUT_LENGTH + offset)]
        else:
            if INPUT_LENGTH > len(sound_clip):
                max_offset = INPUT_LENGTH - len(sound_clip)
                offset = np.random.randint(max_offset)
            else:
                offset = 0
            sound_clip = np.pad(
                sound_clip, (offset, INPUT_LENGTH - len(sound_clip) - offset),
                "constant")
        # extract mfcc features
        mfcc = librosa.feature.mfcc(sound_clip, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
        mfcc = np.expand_dims(mfcc, axis=-1)
        X[i, ] = mfcc
    # return the extracted features to the calling program
    return np.array(X)
示例#2
0
def run_mnn(dataset):
	## DATASET = 0 => ALL DATASET
	## DATASET = 1 => CONFUSION MATRIX
	train_csv = TRAIN_CSV if (dataset == 0) else TRAIN_CONF_CSV

	# print a log message for status update
	utils.write_log_msg("creating data dictionary...")

	# create a dictionary from the provided train.csv file
	dictionary = utils.create_dictionary(train_csv)  

	# print a log message for status update
	utils.write_log_msg("extracting features of training data...")

	# call the feature extraction module to get audio features
	tr_mnn_features, tr_mnn_labels =  features.parse_audio_files_train(TRAIN_AUDIO_PATH,train_csv,dictionary, 0)  

	# print a log message for status update
	utils.write_log_msg("extracting features of prediction data...")
	# call the feature extraction module to get audio features
	if (dataset == 0) :
		    ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(TEST_AUDIO_PATH,os.listdir(TEST_AUDIO_PATH), 0) 
	else :
		test_csv = pd.read_csv(TEST_CONF_CSV)
		ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(TRAIN_AUDIO_PATH,test_csv["fname"].tolist(), 0) 

	# print a log message for status update
	utils.write_log_msg("starting multi-layer neural network training...")
	# use the above extracted features for the training of the model
	mnn_y_pred, mnn_probs, mnn_pred = train.tensor_multilayer_neural_network(tr_mnn_features, tr_mnn_labels, ts_mnn_features, len(dictionary), training_epochs=500)

	# Get top 3 predictions
	ensembled_output = np.zeros(shape=(mnn_probs.shape[0], mnn_probs.shape[1]))
	for row, columns in enumerate(mnn_pred):
	    for i, column in enumerate(columns):
	        ensembled_output[row, column] += mnn_probs[row, i]

	top3 = ensembled_output.argsort()[:,-3:][:,::-1]

	# print the predicted results to a csv file.
	file_ = open(OUTPUT_CSV, "w")
	file_.write("fname,label\n")
	for i, value in enumerate(top3):
		if(dataset ==0):
			lbl_1 = [k for k, v in dictionary.items() if v == value[0]][0]
			lbl_2 = [k for k, v in dictionary.items() if v == value[1]][0]
			lbl_3 = [k for k, v in dictionary.items() if v == value[2]][0]
			file_.write("%s,%s %s %s\n" % (ts_mnn_name_list[i], lbl_1, lbl_2, lbl_3))
		else :
			lbl_1 = [k for k, v in dictionary.items() if v == value[0]][0]
			file_.write("%s,%s\n" % (ts_mnn_name_list[i], lbl_1))
	if (dataset ==0) :
		file_.write("0b0427e2.wav,Harmonica\n6ea0099f.wav,Harmonica\nb39975f5.wav,Harmonica") 

	# print a log message for status update
	utils.write_log_msg("done...")
示例#3
0
def parse_audio_files_train(audio_path,
                            train_csv_path,
                            label_dictionary,
                            nn_type,
                            file_ext="*.wav"):
    # initialize variables
    labels = np.empty(0)
    # read audio files using pandas and split it into chunks of 'CHUNK_SIZE' files each
    data = pd.read_csv(train_csv_path, chunksize=CHUNK_SIZE)
    # create a thread pool to process the workload
    thread_pool = []
    # each chunk is the amount of data that will be processed by a single thread
    for chunk in data:
        if (nn_type == 0):
            features = np.empty((0, FEATURE_SIZE))
            thread_pool.append(
                utils.ThreadWithReturnValue(target=p_train_thread,
                                            args=(audio_path, label_dictionary,
                                                  chunk)))
        else:
            features = np.empty(shape=(0, N_MFCC, AUDIO_LENGTH, 1))
            thread_pool.append(
                utils.ThreadWithReturnValue(target=p_train_cnn_thread,
                                            args=(audio_path, label_dictionary,
                                                  chunk)))
    # print a log message for status update
    utils.write_log_msg("TRAIN: creating a total of {0} threads...".format(
        len(thread_pool)))
    # start the entire thread pool
    for single_thread in thread_pool:
        single_thread.start()
    # wait for thread pool to return their results of processing
    for single_thread in thread_pool:
        ft, lbl = single_thread.join()
        features = np.vstack([features, ft])
        labels = np.append(labels, lbl)
    # perform final touches to extracted arrays
    features = np.array(features)
    #print(labels)
    labels = np.array(labels, dtype=np.int)

    # normalize data
    mean = np.mean(features, axis=0)
    std = np.std(features, axis=0)
    features = (features - mean) / std

    # return the extracted features to the calling program
    return features, labels
示例#4
0
def p_train_thread(audio_path, label_dictionary, data):
    # initialize variables
    features, labels = np.empty((0, FEATURE_SIZE)), np.empty(0)
    # process this threads share of workload
    for i in range(data.shape[0]):
        # add a log message to be displayed after processing every 250 files.
        if i % 250 == 0:
            utils.write_log_msg("FEATURE_TRAIN - {0}...".format(i))
        line = data.iloc[i]
        fn = audio_path + line["fname"]
        mfccs, chroma, mel, contrast, tonnetz = extract_feature(fn)
        ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
        features = np.vstack([features, ext_features])
        labels = np.append(labels, label_dictionary[line["label"]])
    # return the extracted features to the calling program
    return features, labels
示例#5
0
def p_predict_thread(audio_path, name_list):
    # initialize variables
    features = np.empty((0, FEATURE_SIZE))
    # traverse through the name list and process this threads workload
    for fname in name_list:
        X, sample_rate = librosa.load(audio_path + fname,
                                      res_type='kaiser_fast')
        mfccs, chroma, mel, contrast, tonnetz = extract_feature(audio_path +
                                                                fname)
        ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
        features = np.vstack([features, ext_features])
        # add a log message to be displayed after processing every 250 files.
        if len(features) % 250 == 0:
            utils.write_log_msg("FEATURE_PREDICT - {0}...".format(
                len(features)))
    # return the extracted features to the calling program
    return features
示例#6
0
def parse_audio_files_predict(audio_path,
                              name_list,
                              nn_type,
                              file_ext="*.wav"):
    # create a thread pool to process the workload
    thread_pool = []
    # split the filename list into chunks of 'CHUNK_SIZE' files each
    data = utils.generate_chunks(name_list, CHUNK_SIZE)
    # each chunk is the amount of data that will be processed by a single thread
    for chunk in data:
        if nn_type == 0:
            features = np.empty((0, FEATURE_SIZE))
            thread_pool.append(
                utils.ThreadWithReturnValue(target=p_predict_thread,
                                            args=(audio_path, chunk)))
        else:
            features = np.empty(shape=(0, N_MFCC, AUDIO_LENGTH, 1))
            thread_pool.append(
                utils.ThreadWithReturnValue(target=p_predict_cnn_thread,
                                            args=(audio_path, chunk)))
    # print a log message for status update
    utils.write_log_msg("PREDICT: creating a total of {0} threads...".format(
        len(thread_pool)))
    # start the entire thread pool
    for single_thread in thread_pool:
        single_thread.start()
    # wait for thread pool to return their results of processing
    for single_thread in thread_pool:
        ft = single_thread.join()
        features = np.vstack([features, ft])
    # perform final touches to extracted arrays
    features = np.array(features)

    # normalize data
    mean = np.mean(features, axis=0)
    std = np.std(features, axis=0)
    features = (features - mean) / std

    # return the extracted features to the calling program
    return features, name_list
示例#7
0
def p_train_cnn_thread(audio_path, label_dictionary, data):
    # initialize variables
    labels = np.empty(0)
    X = np.empty(shape=(data.shape[0], N_MFCC, AUDIO_LENGTH, 1))
    # process this threads share of workload
    for i in range(data.shape[0]):
        # add a log message to be displayed after processing every 250 files.
        if i % 250 == 0:
            utils.write_log_msg("FEATURE_CNN_TRAIN - {0}...".format(i))
        line = data.iloc[i]
        fn = audio_path + line["fname"]
        sound_clip, _ = librosa.core.load(fn,
                                          sr=SAMPLE_RATE,
                                          res_type='kaiser_fast')
        # Random offset / Padding
        if len(sound_clip) > INPUT_LENGTH:
            max_offset = len(sound_clip) - INPUT_LENGTH
            offset = np.random.randint(max_offset)
            sound_clip = sound_clip[offset:(INPUT_LENGTH + offset)]
        else:
            if INPUT_LENGTH > len(sound_clip):
                max_offset = INPUT_LENGTH - len(sound_clip)
                offset = np.random.randint(max_offset)
            else:
                offset = 0
            sound_clip = np.pad(
                sound_clip, (offset, INPUT_LENGTH - len(sound_clip) - offset),
                "constant")
        # extract mfcc features
        mfcc = librosa.feature.mfcc(sound_clip, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
        mfcc = np.expand_dims(mfcc, axis=-1)
        X[i, ] = mfcc
        # populate the labels array
        labels = np.append(labels, label_dictionary[line["label"]])
    # return the extracted features to the calling program
    return np.array(X), labels
示例#8
0
def main(_load = False):
    # intialize the log file for current run of the code
    utils.initialize_log()  
    # read audio files and parse them or simply load from pre-extracted feature files
    if _load:
        dictionary, tr_mnn_features, tr_mnn_labels, ts_mnn_features, ts_mnn_name_list, tr_cnn_features, tr_cnn_labels, ts_cnn_features, ts_cnn_name_list  = read_audio_files()  
    else:
        dictionary, tr_mnn_features, tr_mnn_labels, ts_mnn_features, ts_mnn_name_list, tr_cnn_features, tr_cnn_labels, ts_cnn_features, ts_cnn_name_list  = features.read_features()
    # print a log message for status update
    utils.write_log_msg("starting multi-layer neural network training...")
    # use the above extracted features for the training of the model
    predictions_top3 = train.train(tr_mnn_features, tr_mnn_labels, ts_mnn_features, tr_cnn_features, tr_cnn_labels, ts_cnn_features, n_classes=len(dictionary))
    # print a log message for status update
    utils.write_log_msg("outputing prediction results to a csv file...")
    # print the predicted results to a csv file.
    utils.print_csv_file(predictions_top3, ts_mnn_name_list, dictionary, OUTPUT_CSV)
    # print a log message for status update
    utils.write_log_msg("done...")
示例#9
0
def read_audio_files():

    # print a log message for status update
    utils.write_log_msg("creating data dictionary...")

    # create a dictionary from the provided train.csv file
    dictionary = utils.create_dictionary(TRAIN_CSV)

    # print a log message for status update
    utils.write_log_msg("extracting features of training data...")
    # call the feature extraction module to get audio features
    tr_mnn_features, tr_mnn_labels = features.parse_audio_files_train(
        TRAIN_AUDIO_PATH, TRAIN_CSV, dictionary, 0)
    # call the feature extraction module to get audio features
    tr_cnn_features, tr_cnn_labels = features.parse_audio_files_train(
        TRAIN_AUDIO_PATH, TRAIN_CSV, dictionary, 1)

    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of training data for mnn...".format(
            len(tr_mnn_features)))
    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of training data for cnn...".format(
            len(tr_cnn_features)))

    # print a log message for status update
    utils.write_log_msg("extracting features of prediction data...")
    # call the feature extraction module to get audio features
    ts_mnn_features, ts_mnn_name_list = features.parse_audio_files_predict(
        TEST_AUDIO_PATH, os.listdir(TEST_AUDIO_PATH), 0)
    # call the feature extraction module to get audio features
    ts_cnn_features, ts_cnn_name_list = features.parse_audio_files_predict(
        TEST_AUDIO_PATH, os.listdir(TEST_AUDIO_PATH), 1)

    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of prediction data for mnn...".format(
            len(ts_mnn_features)))
    # print a log message for status update
    utils.write_log_msg(
        "processed {0} files of prediction data for cnn...".format(
            len(ts_cnn_features)))

    # print a log message for status update
    utils.write_log_msg("storing features for future use...")
    # store features so that they can be used in future
    features.store_features(dictionary, tr_mnn_features, tr_mnn_labels,
                            ts_mnn_features, ts_mnn_name_list, tr_cnn_features,
                            tr_cnn_labels, ts_cnn_features, ts_cnn_name_list)

    # return the results to calling program
    return dictionary, tr_mnn_features, tr_mnn_labels, ts_mnn_features, ts_mnn_name_list, tr_cnn_features, tr_cnn_labels, ts_cnn_features, ts_cnn_name_list
示例#10
0
def main():
    utils.write_log_msg("Run CNN code ...")
    run_cnn(2)
示例#11
0
def main():
	utils.write_log_msg("Run MNN code ...")
	#limitedCsv()
	#run_mnn(0)
	create_confmatrix()
示例#12
0
def tensor_multilayer_neural_network(tr_features, tr_labels, ts_features, n_classes, training_epochs):
    # initialize the beginning paramters.
    n_dim = tr_features.shape[1]
    n_hidden_units_1 =  200   #280 
    n_hidden_units_2 =  250  #300
    n_hidden_units_3 =  300  #300
    
    sd = 1 / np.sqrt(n_dim)
    
    # one hot encode from training labels 
    tr_labels = to_categorical(tr_labels)  
    
    X = tf.placeholder(tf.float32,[None,n_dim])
    Y = tf.placeholder(tf.float32,[None,n_classes])

    # initializing starting learning rate - will use decaying technique
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(0.005, global_step, 500, 0.95, staircase=True)
    
    # initialize layer 1 parameters
    W_1 = tf.Variable(tf.random_normal([n_dim,n_hidden_units_1], mean = 0, stddev=sd))
    b_1 = tf.Variable(tf.random_normal([n_hidden_units_1], mean = 0, stddev=sd))
    h_1 = tf.nn.tanh(tf.matmul(X,W_1) + b_1)

    # initialize layer 2 parameters
    W_2 = tf.Variable(tf.random_normal([n_hidden_units_1,n_hidden_units_2], mean = 0, stddev=sd))
    b_2 = tf.Variable(tf.random_normal([n_hidden_units_2], mean = 0, stddev=sd))
    h_2 = tf.nn.sigmoid(tf.matmul(h_1,W_2) + b_2)

    # initialize layer 3 parameters
    W_3 = tf.Variable(tf.random_normal([n_hidden_units_2,n_hidden_units_3], mean = 0, stddev=sd))
    b_3 = tf.Variable(tf.random_normal([n_hidden_units_3], mean = 0, stddev=sd))
    h_3 = tf.nn.sigmoid(tf.matmul(h_2,W_3) + b_3)
    
    W = tf.Variable(tf.random_normal([n_hidden_units_3,n_classes], mean = 0, stddev=sd))
    b = tf.Variable(tf.random_normal([n_classes], mean = 0, stddev=sd))
    y_ = tf.nn.softmax(tf.matmul(h_3,W) + b)

    cost_function = -tf.reduce_sum(Y * tf.log(y_))
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost_function, global_step=global_step)

    init = tf.global_variables_initializer()
    
    cost_history = np.empty(shape=[1],dtype=float)
    y_pred = None
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(training_epochs):            
            # print a log message for status update
            utils.write_log_msg("running the mnn training epoch {0}...".format(epoch+1))
            # running the training_epoch numbered epoch
            _,cost = sess.run([optimizer,cost_function],feed_dict={X:tr_features,Y:tr_labels})
            cost_history = np.append(cost_history,cost)
        # predict results based on the trained model
        y_pred = sess.run(tf.argmax(y_,1),feed_dict={X: ts_features})
        y_k_probs, y_k_pred = sess.run(tf.nn.top_k(y_, k=n_classes), feed_dict={X: ts_features})

    # plot cost history
    df = pd.DataFrame(cost_history)
    df.to_csv("../data/cost_history_mnn.csv")

    # return the predicted values back to the calling program
    return y_pred, y_k_probs, y_k_pred