示例#1
0
def preprocess_datapoint(input_sound, input_annotation):
    '''
    generate predictors (stft) and target (valence sequence)
    of one sound file from the OMG dataset
    '''
    sr, samples = uf.wavread(input_sound)  #read audio
    e_samples = uf.preemphasis(samples, sr)  #apply preemphasis
    feats = fa.extract_features(e_samples)  #extract features
    annotation = pandas.read_csv(input_annotation)  #read annotations
    annotation = annotation.values
    annotation = np.reshape(annotation, annotation.shape[0])
    annotated_frames = int(len(annotation) * frames_per_annotation)
    feats = feats[:annotated_frames]  #discard non annotated final frames
    annotation = annotation[TARGET_DELAY:]  #shift back annotations by target_delay
    feats2 = feats[:-frames_delay]

    return feats, annotation
def extract_LLD_datapoint(input_sound, input_annotation):
    '''
    load one audio file and compute the model's last
    latent dimension
    '''
    sr, samples = uf.wavread(input_sound)  #load
    e_samples = uf.preemphasis(samples, sr)  #apply preemphasis
    predictors = fa.extract_features(e_samples)  #compute power law spectrum
    #normalize by training mean and std
    predictors = np.subtract(predictors, ref_mean)
    predictors = np.divide(predictors, ref_std)
    final_vec = np.array([])
    #load target
    target = pandas.read_csv(input_annotation)
    target = target.values
    target = np.reshape(target, (target.shape[0]))

    #compute last latent dim until last frame
    start = 0
    while start < (len(target) - SEQ_LENGTH):
        start_features = int(start * frames_per_annotation)
        stop_features = int((start + SEQ_LENGTH) * frames_per_annotation)
        predictors_temp = predictors[start_features:stop_features]
        predictors_temp = predictors_temp.reshape(1, predictors_temp.shape[0],
                                                  predictors_temp.shape[1])
        features_temp = latent_extractor([predictors_temp])
        features_temp = np.reshape(features_temp,
                                   (SEQ_LENGTH, feats_per_valence))
        if final_vec.shape[0] == 0:
            final_vec = features_temp
        else:
            final_vec = np.concatenate((final_vec, features_temp), axis=0)
        print 'Progress: ' + str(
            int(100 * (final_vec.shape[0] / float(len(target))))) + '%'
        start += SEQ_LENGTH
    #compute last latent dim for last frame
    predictors_temp = predictors[-int(SEQ_LENGTH * frames_per_annotation):]
    predictors_temp = predictors_temp.reshape(1, predictors_temp.shape[0],
                                              predictors_temp.shape[1])
    features_temp = latent_extractor([predictors_temp])
    features_temp = np.reshape(features_temp, (SEQ_LENGTH, feats_per_valence))
    missing_samples = len(target) - final_vec.shape[0]
    last_vec = features_temp[-missing_samples:]
    final_vec = np.concatenate((final_vec, last_vec), axis=0)

    return final_vec
示例#3
0
def predict_datapoint(input_sound, input_annotation):
    '''
    loads one audio file and predicts its coutinuous valence

    '''
    sr, samples = uf.wavread(input_sound)  #load
    e_samples = uf.preemphasis(samples, sr)  #apply preemphasis
    predictors = fa.extract_features(e_samples)  #compute power law spectrum
    #normalize by training mean and std
    predictors = np.subtract(predictors, ref_mean)
    predictors = np.divide(predictors, ref_std)
    #load target
    target = pandas.read_csv(input_annotation)
    target = target.values
    target = np.reshape(target,(target.shape[0]))
    final_pred = []
    #compute prediction until last frame
    start = 0
    while start < (len(target)-SEQ_LENGTH):
        start_features = int(start * frames_per_annotation)
        stop_features = int((start + SEQ_LENGTH) * frames_per_annotation)
        predictors_temp = predictors[start_features:stop_features]
        predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1])
        #predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1], 1)

        prediction = valence_model.predict(predictors_temp)
        for i in range(prediction.shape[1]):
            final_pred.append(prediction[0][i])
        perc = int(float(start)/(len(target)-SEQ_LENGTH) * 100)
        print "Computing prediction: " + str(perc) + "%"
        start += SEQ_LENGTH
    #compute prediction for last frame
    predictors_temp = predictors[-int(SEQ_LENGTH*frames_per_annotation):]
    predictors_temp = predictors_temp.reshape(1,predictors_temp.shape[0], predictors_temp.shape[1])
    prediction = valence_model.predict(predictors_temp)
    missing_samples = len(target) - len(final_pred)
    #last_prediction = prediction[0][-missing_samples:]
    reverse_index = np.add(list(reversed(range(missing_samples))),1)
    for i in reverse_index:
        final_pred.append(prediction[0][-i])
    final_pred = np.array(final_pred)



    '''
    #compute best prediction shift
    shifted_cccs = []
    time = np.add(1,range(200))
    print "Computing best optimization parameters"
    for i in time:
        t = target.copy()
        p = final_pred.copy()
        t = t[i:]
        p = p[:-i]
        #print t.shape, p.shape

        temp_ccc = ccc2(t, p)
        shifted_cccs.append(temp_ccc)


    best_shift = np.argmax(shifted_cccs)
    best_ccc = np.max(shifted_cccs)
    if best_shift > 0:
        best_target = target[best_shift:]
        best_pred = final_pred[:-best_shift]
    else:
        best_target = target
        best_pred = final_pred
    #print 'LEN BEST PRED: ' + str(len(best_pred))

    #compute best parameters for the filter
    test_freqs = []
    test_orders = []
    test_cccs = []
    freqs = np.arange(0.01,0.95,0.01)
    orders = np.arange(1,10,1)
    print "Finding best optimization parameters..."
    for freq in freqs:
        for order in orders:
            test_signal = best_pred.copy()
            b, a = butter(order, freq, 'low')
            filtered = filtfilt(b, a, test_signal)
            temp_ccc = ccc2(best_target, filtered)
            test_freqs.append(freq)
            test_orders.append(order)
            test_cccs.append(temp_ccc)
    best_filter = np.argmax(test_cccs)
    best_order = test_orders[best_filter]
    best_freq = test_freqs[best_filter]
    '''
    #POSTPROCESSING
    #normalize between -1 and 1
    final_pred = np.multiply(final_pred, 2.)
    final_pred = np.subtract(final_pred, 1.)

    #apply f_trick
    ann_folder = '../dataset/Training/Annotations'
    target_mean, target_std = uf.find_mean_std(ann_folder)
    final_pred = uf.f_trick(final_pred, target_mean, target_std)

    #apply butterworth filter
    b, a = butter(3, 0.01, 'low')
    final_pred = filtfilt(b, a, final_pred)

    ccc = ccc2(final_pred, target)  #compute ccc
    print "CCC = " + str(ccc)

    '''
    plt.plot(target)
    plt.plot(final_pred, alpha=0.7)
    plt.legend(['target','prediction'])
    plt.show()
    '''

    return ccc