def build_audioFAUsFeatures_downsampledDataset_test(path, downsamplingFactor): """ FUNCTION NAME: build_audioFAUsFeatures_downsampledDataset This function creates audio+FAUs features .csv files so they can be used in the machine learning stage of our working pipeline. This function is aimed to be used when the dataset available is reduced by a certain factor. This function is aimed to be used when generating features data from the testing set. INPUT: ------ -> path: path to data -> downsamplingFactor: factor by which the original dataset is downsampled OUTPUT: ------- """ features = [] labels = [] header = True headerStringFeatures = ['subjectID', 'storyID', 'frameID', \ 'f_1', 'f_2', 'f_3','f_4', 'f_5', 'f_6', 'f_7', 'f_8', 'f_9', 'f_10', \ 'f_11', 'f_12', 'f_13', 'f_14', 'f_15', 'f_16', 'f_17', 'f_18', 'f_19', 'f_20', \ 'f_21', 'f_22', 'f_23', 'f_24', 'f_25', 'f_26', 'f_27', 'f_28', 'f_29', 'f_30', \ 'f_31', 'f_32', 'f_33', 'f_34', 'f_35', 'f_36', 'f_37', 'f_38', 'f_39', 'f_40', \ 'f_41', 'f_42', 'f_43', 'f_44', 'f_45', 'f_46', 'f_47', 'f_48', 'f_49', 'f_50', \ 'f_51', 'f_52', 'f_53', 'f_54', 'f_55', 'f_56', 'f_57', 'f_58', 'f_59', 'f_60', \ 'f_61', 'f_62', 'f_63', 'f_64', 'f_65', 'f_66', 'f_67', 'f_68', 'f_69', 'f_70', \ 'f_71', 'f_72', 'f_73', 'f_74', 'f_75', 'f_76', 'f_77', 'f_78', 'f_79', 'f_80', \ 'f_81', 'f_82', 'f_83', 'f_84', 'f_85', 'f_86', 'f_87', 'f_88', \ 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', \ 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', \ 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', \ 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', \ 'AU26_c', 'AU28_c', 'AU45_c'] outputFeaturesFile = path + '/DataMatrices/openSMILE_FAUs_downsamplingFactor' + str( downsamplingFactor) + '_features.csv' folders = sorted(os.listdir(path + '/AudioFeatures')) for folder in folders: if (os.path.isdir(path + '/AudioFeatures' + '/' + folder)) and (len(folder.split('_')) == 5) and \ (folder.split('_')[4] == 'downsampledDataBy' + str(downsamplingFactor)): print 'Building data for: ' + folder + ' ...' files = sorted(os.listdir(path + '/AudioFeatures' + '/' + folder)) # FAU features path FAUpath = path + '/VideoFeatures/' + folder.split( '_downsampledDataBy')[0] + '.csv' subjectID = int(folder.split('_')[1]) storyID = int(folder.split('_')[3]) print ' Reading openSMILE features information ...' openSMILEfeatures = [] for file in files: if file.endswith('.csv'): openSMILEpath = path + '/AudioFeatures/' + folder + '/' + file # Read features information currentFeat = DE.read_openSMILEfeatures(openSMILEpath) # Reshape features information currentFeat = DE.reshape_openSMILEfeaturesVector( currentFeat) # Concatenate openSMILE features information if len(openSMILEfeatures) == 0: openSMILEfeatures = currentFeat else: openSMILEfeatures = DE.incrementalMatrix( openSMILEfeatures, currentFeat) print ' Reading FAUs features information ...' # Read FAUs information currentFAUs = DE.read_FAUs(FAUpath) # Select the proper instances according to the downsampling dwIDs = DE.get_downsamplingIDs(currentFAUs, downsamplingFactor) subjectID, storyID, framesID = DE.get_infoFromFAUs( currentFAUs, subjectID, storyID) # Concatenate openSMILE + FAUs features information features = DE.concatenate_info_openSMILE_FAUs( subjectID[dwIDs, :], storyID[dwIDs, :], framesID[dwIDs, :], openSMILEfeatures, currentFAUs[dwIDs, :]) # Write data in .csv file if header == True: f = pd.DataFrame(features) f.to_csv(outputFeaturesFile, header=headerStringFeatures, float_format='%e', index=False, mode='w') else: f = pd.DataFrame(features) f.to_csv(outputFeaturesFile, header=header, float_format='%e', index=False, mode='a') header = False
def test_model_2BLSTM_variableSequenceLength(inputPath, outputPath, modelType, MLtechnique, features, dw, batch_size, patience, LSTMunits=30): """ FUNCTION NAME: test_model_2BLSTM_variableSequenceLength Function to test 2B-LSTM models trained on the testing/validation sets. INPUT: ------ -> inputPath: path where trained models are stored -> outputPath: path where the validation data needs to be stored -> modelType: type of model to train -> MLtechnique: technique to use to train the model -> features: matrix of features to validate the model -> dw: factor used when downsampling the available data -> batch_size: value for batch_size parameter -> patience: value for patience parameter -> LSTMunits: number of units of the LSTM OUTPUT: ------- <- predictions: numpy array with the annotations predicted from the input features with data structure: [subjectID, storyID, frameID, predictions] """ predictions = [] subjectIDs = DE.get_uniqueValues(features[:,0]) storyIDs = DE.get_uniqueValues(features[:,1]) for sbID in subjectIDs: if (dw == 1): modelName = inputPath + 'Model_Subject' + str(int(sbID)) + '_' + MLtechnique + '_LSTMunits' + str(LSTMunits) + '_BatchSize' + str(batch_size) + '_Patience' + str(patience) + '_' + modelType else: modelName = inputPath + 'Model_Subject' + str(int(sbID)) + '_DW' + str(dw) + '_' + MLtechnique + '_LSTMunits' + str(LSTMunits) + '_BatchSize' + str(batch_size) + '_Patience' + str(patience) + '_' + modelType print '-> Loading model from disk ...' # Load model model = load_model(modelName + '.h5', custom_objects={'loss_CCC': loss_CCC}) print '<- Model loaded!' for stID in storyIDs: current_rows = DE.getArguments_SubjectID_StoryID(features, sbID, stID) selectedFeatures = features[current_rows, 3:] modelInputFeatures = np.reshape(selectedFeatures,(1,np.shape(selectedFeatures)[0], np.shape(selectedFeatures)[1])) pred_annotations = np.squeeze(model.predict(modelInputFeatures)) # Apply median filtering to the valence predictions pred_annotations = medfilt(pred_annotations, 301) currentOutput = np.hstack((features[current_rows,:3], np.reshape(pred_annotations,(-1,1)))) if len(predictions) == 0: predictions = currentOutput else: predictions = DE.incrementalMatrix(predictions, currentOutput) del model return predictions
def build_audioFAUsFeatures(path): """ FUNCTION NAME: build_audioFAUsFeatures This function creates openSMILE and FAUs features and label .csv files so they can be used in the machine learning stage of our working pipeline. INPUT: ------ -> path: path to data OUTPUT: ------- """ features = [] labels = [] header = True headerStringFeatures = ['subjectID', 'storyID', 'frameID', \ 'f_1', 'f_2', 'f_3','f_4', 'f_5', 'f_6', 'f_7', 'f_8', 'f_9', 'f_10', \ 'f_11', 'f_12', 'f_13', 'f_14', 'f_15', 'f_16', 'f_17', 'f_18', 'f_19', 'f_20', \ 'f_21', 'f_22', 'f_23', 'f_24', 'f_25', 'f_26', 'f_27', 'f_28', 'f_29', 'f_30', \ 'f_31', 'f_32', 'f_33', 'f_34', 'f_35', 'f_36', 'f_37', 'f_38', 'f_39', 'f_40', \ 'f_41', 'f_42', 'f_43', 'f_44', 'f_45', 'f_46', 'f_47', 'f_48', 'f_49', 'f_50', \ 'f_51', 'f_52', 'f_53', 'f_54', 'f_55', 'f_56', 'f_57', 'f_58', 'f_59', 'f_60', \ 'f_61', 'f_62', 'f_63', 'f_64', 'f_65', 'f_66', 'f_67', 'f_68', 'f_69', 'f_70', \ 'f_71', 'f_72', 'f_73', 'f_74', 'f_75', 'f_76', 'f_77', 'f_78', 'f_79', 'f_80', \ 'f_81', 'f_82', 'f_83', 'f_84', 'f_85', 'f_86', 'f_87', 'f_88', \ 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', \ 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', \ 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', \ 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', \ 'AU26_c', 'AU28_c', 'AU45_c'] headerStringLabels = ['subjectID', 'storyID', 'frameID', 'valence'] outputFeaturesFile = path + '/DataMatrices/openSMILE_FAUs_features.csv' outputLabelsFile = path + '/DataMatrices/openSMILE_FAUs_labels.csv' folders = sorted(os.listdir(path + '/AudioFeatures')) for folder in folders: if os.path.isdir(path + '/AudioFeatures' + '/' + folder): print 'Building data for: ' + folder + ' ...' files = sorted(os.listdir(path + '/AudioFeatures' + '/' + folder)) # FAU features path FAUpath = path + '/VideoFeatures/' + folder + '.csv' # Annotations path valencePath = path + '/Annotations/' + folder + '.csv' subjectID = int(folder.split('_')[1]) storyID = int(folder.split('_')[3]) print ' Reading openSMILE features information ...' openSMILEfeatures = [] for file in files: if file.endswith('.csv'): openSMILEpath = path + '/AudioFeatures/' + folder + '/' + file # Read features information currentFeat = DE.read_openSMILEfeatures(openSMILEpath) # Reshape features information currentFeat = DE.reshape_openSMILEfeaturesVector( currentFeat) # Concatenate openSMILE features information if len(openSMILEfeatures) == 0: openSMILEfeatures = currentFeat else: openSMILEfeatures = DE.incrementalMatrix( openSMILEfeatures, currentFeat) print ' Reading FAUs features information ...' # Read FAUs information currentFAUs = DE.read_FAUs(FAUpath) subjectID, storyID, framesID = DE.get_infoFromFAUs( currentFAUs, subjectID, storyID) print ' Reading valence labels information ...' # Read valence labels information annotations = DE.get_annotationsFromFile(valencePath) # Concatenate labels information labels = DE.concatenate_info_FAUs(subjectID, storyID, framesID, annotations) # Concatenate openSMILE + FAUs features information features = DE.concatenate_info_openSMILE_FAUs( subjectID, storyID, framesID, openSMILEfeatures, currentFAUs) # Write data in .csv file if header == True: f = pd.DataFrame(features) f.to_csv(outputFeaturesFile, header=headerStringFeatures, float_format='%e', index=False, mode='w') l = pd.DataFrame(labels) l.to_csv(outputLabelsFile, header=headerStringLabels, index=False, mode='w') else: f = pd.DataFrame(features) f.to_csv(outputFeaturesFile, header=header, float_format='%e', index=False, mode='a') l = pd.DataFrame(labels) l.to_csv(outputLabelsFile, header=header, index=False, mode='a') header = False