Exemplo n.º 1
0
def predict_datapoint(input_sound, input_annotation):
    '''
    loads one audio file and predicts its coutinuous valence

    '''
    sr, samples = uf.wavread(input_sound)  #load
    e_samples = uf.preemphasis(samples, sr)  #apply preemphasis
    predictors = fa.extract_features(e_samples)  #compute power law spectrum
    #normalize by training mean and std
    predictors = np.subtract(predictors, ref_mean)
    predictors = np.divide(predictors, ref_std)
    #load target
    target = pandas.read_csv(input_annotation)
    target = target.values
    target = np.reshape(target,(target.shape[0]))
    final_pred = []
    #compute prediction until last frame
    start = 0
    while start < (len(target)-SEQ_LENGTH):
        start_features = int(start * frames_per_annotation)
        stop_features = int((start + SEQ_LENGTH) * frames_per_annotation)
        predictors_temp = predictors[start_features:stop_features]
        predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1])
        #predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1], 1)

        prediction = valence_model.predict(predictors_temp)
        for i in range(prediction.shape[1]):
            final_pred.append(prediction[0][i])
        perc = int(float(start)/(len(target)-SEQ_LENGTH) * 100)
        print "Computing prediction: " + str(perc) + "%"
        start += SEQ_LENGTH
    #compute prediction for last frame
    predictors_temp = predictors[-int(SEQ_LENGTH*frames_per_annotation):]
    predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1])
    prediction = valence_model.predict(predictors_temp)
    missing_samples = len(target) - len(final_pred)
    #last_prediction = prediction[0][-missing_samples:]
    reverse_index = np.add(list(reversed(range(missing_samples))),1)
    for i in reverse_index:
        final_pred.append(prediction[0][-i])
    final_pred = np.array(final_pred)



    '''
    #compute best prediction shift
    shifted_cccs = []
    time = np.add(1,range(200))
    print "Computing best optimization parameters"
    for i in time:
        t = target.copy()
        p = final_pred.copy()
        t = t[i:]
        p = p[:-i]
        #print t.shape, p.shape

        temp_ccc = ccc2(t, p)
        shifted_cccs.append(temp_ccc)


    best_shift = np.argmax(shifted_cccs)
    best_ccc = np.max(shifted_cccs)
    if best_shift > 0:
        best_target = target[best_shift:]
        best_pred = final_pred[:-best_shift]
    else:
        best_target = target
        best_pred = final_pred
    #print 'LEN BEST PRED: ' + str(len(best_pred))

    #compute best parameters for the filter
    test_freqs = []
    test_orders = []
    test_cccs = []
    freqs = np.arange(0.01,0.95,0.01)
    orders = np.arange(1,10,1)
    print "Finding best optimization parameters..."
    for freq in freqs:
        for order in orders:
            test_signal = best_pred.copy()
            b, a = butter(order, freq, 'low')
            filtered = filtfilt(b, a, test_signal)
            temp_ccc = ccc2(best_target, filtered)
            test_freqs.append(freq)
            test_orders.append(order)
            test_cccs.append(temp_ccc)
    best_filter = np.argmax(test_cccs)
    best_order = test_orders[best_filter]
    best_freq = test_freqs[best_filter]
    '''
    #POSTPROCESSING
    #normalize between -1 and 1
    final_pred = np.multiply(final_pred, 2.)
    final_pred = np.subtract(final_pred, 1.)

    #apply f_trick
    ann_folder = '../dataset/Training/Annotations'
    target_mean, target_std = uf.find_mean_std(ann_folder)
    final_pred = uf.f_trick(final_pred, target_mean, target_std)

    #apply butterworth filter
    b, a = butter(3, 0.01, 'low')
    final_pred = filtfilt(b, a, final_pred)

    ccc = ccc2(final_pred, target)  #compute ccc
    print "CCC = " + str(ccc)

    '''
    plt.plot(target)
    plt.plot(final_pred, alpha=0.7)
    plt.legend(['target','prediction'])
    plt.show()
    '''

    return ccc
Exemplo n.º 2
0
        end = end + num_frames
        label_slice = labels[start:end]
        video_slice = video_data[start:end]
        audio_slice = audio_data[start * frames_per_annotation:end *
                                 frames_per_annotation]

        if not np.array_equal(label_slice, annotations):
            print '{} label slice and annotations do not match! Num annotations different: {}'.format(
                name, (label_slice != annotations).sum())
            # raise Exception('{} label slice and annotations do not match!'.format(name))

        predictions = predict_datapoint(audio_slice, video_slice, label_slice)
        print predictions[:10], predictions[-10:]
        target_mean = np.mean(train_labels)
        target_std = np.std(train_labels)
        final_pred = uf.f_trick(predictions, target_mean, target_std)

        #apply butterworth filter
        b, a = butter(1, 0.004, 'low')
        final_pred = filtfilt(b, a, final_pred)

        # output to csv file
        preds = {'valence': final_pred}
        df = pd.DataFrame(preds, columns=['valence'])

        # change this folder for different models
        df.to_csv(model_output_path + name + '.csv', index=None, header=True)

        ccc = ccc2(label_slice, final_pred)
        print '{} ccc {}'.format(name, ccc)
        cccs.append(ccc)
audio_gen_val = uf.audio_generator(speech_valid_x, validation_target,
                                   SEQ_LENGTH, batch_size,
                                   frames_per_annotation)
print 'Dataset successfully loaded'

print 'Getting predictions...'
predictions = valence_model.predict_generator(
    audio_gen_val.generate_no_shuffle(), steps=audio_gen_val.stp_per_epoch)

predictions = predictions.reshape(predictions.shape[0])

# apply f_trick
ann_folder = '../dataset/Training/Annotations'
target_mean, target_std = uf.find_mean_std(ann_folder)
predictions = uf.f_trick(predictions, target_mean, target_std)

#apply butterworth filter
b, a = butter(3, 0.01, 'low')
predictions = filtfilt(b, a, predictions)

print predictions
print validation_target

ccc = ccc2(predictions, validation_target[15:])  #compute ccc
print "CCC = " + str(ccc)


def predict_datapoint(input_sound, input_annotation):
    '''
    loads one audio file and predicts its coutinuous valence