def make_low_level_data_file(filename: str, output_file_path: str): """Extracts the lowlevel datafile from a given song file and outputs in the given outputfile Parameters ---------- filename single song file path output_file_path output path of the lowlevel datafile """ # Escape characters that the command line can't handle normally fixed_filename = filename.replace(" ", r"\ ") \ .replace("(", r"\(") \ .replace(")", r"\)") \ .replace("&", r"\&") \ .replace("'", r"\'") extractor_path = get_absolute_path("utilities/ressources" + "/extractors/" + "streaming_extractor_music") command = '{} {} {}'.format(extractor_path, fixed_filename, output_file_path) subprocess.run(command, shell=True)
def test_happy_2(): filename = get_absolute_path(current_directory + "test_face_emotion_extr" + "action_data/happy2.png") img = cv2.imread(filename) face_data = classify_faces([img]) assert len(face_data[0]) == 7 assert face_data[0][3] > 0.5
def test_neutral_1(): filename = get_absolute_path(current_directory + "test_face_emotion_extra" + "ction_data/neutral1.png") img = cv2.imread(filename) face_data = classify_faces([img]) assert len(face_data[0]) == 7 assert face_data[0][6] > 0.5
def test_facial_recognition_of_at_least_one_face(): test_filename = (get_absolute_path ("video_emotion/facial_recognition/t/test" + "_facial_recognition/Fun_at_a_Fair.mp4")) # analyze from 3s to 5s for performance reasons output_frames = analyze_video(test_filename, (3000, 5000)) assert len(output_frames) != 0
def test_Many_Faces(): face_list = [] filename1 = get_absolute_path(current_directory + "test_face_emotion_extr" + "action_data/happy1.png") filename2 = get_absolute_path(current_directory + "test_face_emotion_extr" + "action_data/happy2.png") filename3 = get_absolute_path(current_directory + "test_face_emotion_extr" + "action_data/happy3.png") img1 = cv2.imread(filename1) img2 = cv2.imread(filename2) img3 = cv2.imread(filename3) face_list.append(img1) face_list.append(img2) face_list.append(img3) list_with_faces = classify_faces(face_list) assert list_with_faces[0][3] > 0.5 assert list_with_faces[1][3] > 0.5 assert list_with_faces[2][3] > 0.3
def _load_song(song_id, filename, segments, force=False): """ Loads song features from the database if available otherwise loading the file and loading features from it directly """ print('Loading: ' + song_id) filename = get_absolute_path(filename) print(song_id + ' loading song segments') segs = segments.get_all_by_song_id(song_id) print(song_id + ' song segments loaded') segment_data = [] if (segs == [] or force): print('Loading audio file') # No segments in db, which means no features in db y, sr = librosa.load(filename) for i in range(0, y.shape[0] // sr // 5 - 1): sample = y[(i * sr * 5):((i + 1) * sr * 5)] mfcc, chromagram, tempogram = _process_segment(sample, sr) _id = segments.add(song_id, i * 5 * 1000, (i + 1) * 5 * 1000, mfcc.tobytes(), chromagram.tobytes(), tempogram.tobytes(), []) feature = _create_feature(mfcc, chromagram, tempogram) segment_data.append((_id, song_id, i * 5, feature)) else: print('Loaded from database') # There are segments in db, look for features for i in range(0, len(segs)): segment = segs[i] if (segment['mfcc'] is None or segment['chroma'] is None or segment['tempogram'] is None): break feature = _create_feature(np.frombuffer(segment['mfcc']), np.frombuffer(segment['chroma']), np.frombuffer(segment['tempogram'])) segment_data.append((segment['_id'], song_id, i * 5, feature)) return segment_data
def test_song_data_extraction(): filename = get_absolute_path("classification/extractor/t/test_extractor/" + "8376-1-1 Demolition_Man_proud_music_" + "preview.wav") # This setup is required, to dynamically run this # test from anywhere you'd like. dirname = os.path.abspath(os.path.dirname(__file__)) output_filename = os.path.join(dirname, "8376-1-1_output.json") make_low_level_data_file(filename, output_filename) assert os.path.isfile(output_filename) assert len(os.stat(output_filename)) != 0 os.remove(output_filename)
def test_api_helper(): id = "1337" file_path = get_absolute_path("classification/t/test_segmented_audio_" + "analysis/8376-1-1_Demolition_Man_proud_" + "music_preview.wav") process_data_and_extract_profiles(id, file_path) DBConnection = TrackEmotion() data = DBConnection.get(id) assert data['song_id'] == '1337' assert int(data['bpm']['value']) == 139 assert data['timbre']['value'] == 'dark' assert data['relaxed']['value'] == 'not_relaxed' assert data['party']['value'] == 'not_party' assert data['aggressive']['value'] == 'aggressive' assert data['happy']['value'] == 'not_happy' assert data['sad']['value'] == 'not_sad'
def get_mono_loaded_song(song_path: str): """Loads the file given at the path and returns the raw audio data Parameters ---------- song_path : str The file path of the song Returns ------- vector_real The file's audio downmixed to mono """ path = get_absolute_path(song_path) loader = MonoLoader(filename=path) return loader()
def get_audio_loaded_song(song_path: str): """Loads the file given at the path and returns the audio as a stereosample Parameters ---------- song_path : str The file path of the song Returns ------- stereosample The input stereo audio signal """ path = get_absolute_path(song_path) loader = AudioLoader(filename=path) return loader
def classify_faces(faces) -> [[float]]: """Classifies the given faces Parameters ---------- faces The faces to be processed Returns ------- array Returns a 2d array containing the face emotion data, for each face is an array index with a subarray containing the emotions indexed like in getlabels """ # parameters for loading data and images emotion_model_path = get_absolute_path("video_emotion/" + "emotion_tagger/" + "models/emotion_model.hdf5") # loading models emotion_classifier = load_model(emotion_model_path) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] processed_faces = [] for face in faces: face_grey = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) face_resized = cv2.resize(face_grey, (emotion_target_size)) face_processed = preprocess_input(face_resized) face_expanded_more = numpy.expand_dims(face_processed, -1) processed_faces.append(face_expanded_more) emotion_prediction = emotion_classifier.predict( numpy.asarray(processed_faces)) K.clear_session() return emotion_prediction
def get_classifier_data( data_file_name: str ) -> Tuple[Tuple, Tuple, Tuple, Tuple, Tuple, Tuple]: """Extracts the highlevel mood classifications from a given song file Parameters ---------- data_file_name single song file path Returns ------- Tuple[Tuple, Tuple, Tuple, Tuple, Tuple, Tuple] A tuple of tuples describing all moods and their probability """ # This setup is required, to dynamically run this # test from anywhere you'd like. dirname = os.path.abspath(os.path.dirname(__file__)) profile_file = get_absolute_path("utilities/ressources/" + "timbre_moods_profile.yaml") # Temp file used instead of writing to an actual file temp_file = NamedTemporaryFile(delete=True) command = 'essentia_streaming_extractor_music_svm {} {} {}'.format( data_file_name, temp_file.name, profile_file) subprocess.run("cd {} && {}".format(dirname, command), shell=True) data = json.load(temp_file) temp_file.close() # Setting the data JSON data up in variables highlevel = data['highlevel'] timbre = highlevel['timbre']['value'] timbre_probability = highlevel['timbre']['probability'] mood_relaxed = highlevel['mood_relaxed']['value'] mood_relaxed_probability = highlevel['mood_relaxed']['probability'] mood_party = highlevel['mood_party']['value'] mood_party_probability = highlevel['mood_party']['probability'] mood_aggressive = highlevel['mood_aggressive']['value'] mood_aggressive_probability = highlevel['mood_aggressive']['probability'] mood_happy = highlevel['mood_happy']['value'] mood_happy_probability = highlevel['mood_happy']['probability'] mood_sad = highlevel['mood_sad']['value'] mood_sad_probability = highlevel['mood_sad']['probability'] # list for beautifying code t = [(timbre, timbre_probability), (mood_relaxed, mood_relaxed_probability), (mood_party, mood_party_probability), (mood_aggressive, mood_aggressive_probability), (mood_happy, mood_happy_probability), (mood_sad, mood_sad_probability)] return t[0], t[1], t[2], t[3], t[4], t[5]
import os from typing import Dict import numpy as np import cv2 from utilities.filehandler.handle_path import get_absolute_path dirname = os.path.dirname(__file__) CONFIDENCE_MINIMUM = 0.7 OPENCV_PROTOTXT = get_absolute_path("video_emotion/facial_recognition/" + "deploy.prototxt.txt") OPENCV_MODEl = get_absolute_path("video_emotion/facial_recognition/res10" + "_300x300_ssd_iter_140000_fp16.caffemodel") # Load model from disk NET = cv2.dnn.readNetFromCaffe(OPENCV_PROTOTXT, OPENCV_MODEl) # Various numeral constants IMAGE_RESIZE = 300 SIZE_CONSTANT = 1.0 RED = 104.0 GREEN = 177.0 BLUE = 123.0 def analyze_video(video_path: str, time_range: int = None) -> Dict: """Analyses video finding faces, given videopath and a timerange Parameters