def load_model(config): logger.debug("Loading model...") weights = load_weights(config) params = yamnet_params.Params() yamnet = yamnet_model.yamnet_frames_model(params) yamnet.load_weights(weights) yamnet_classes = yamnet_model.class_names( os.path.join(os.path.dirname(__file__), "yamnet", "yamnet_class_map.csv") ) return yamnet, yamnet_classes, params
def infer(frm_rcv): logger.info('infering ') from yamnet import yamnet as yamnet_model from yamnet import params import json top_k = 521 #report the top k classes connection = pika.BlockingConnection( pika.ConnectionParameters('localhost')) channel = connection.channel() channel.exchange_declare(exchange='inference', exchange_type='fanout') logger.info('model ') try: yamnet = yamnet_model.yamnet_frames_model(params.Params()) except Exception as e: logger.exception('UGGGG') sys.exit(1) yamnet.load_weights('/opt/soundscene/yamnet.h5') logger.info('done model ') while True: try: aud_time, normalized_audio_1hz = frm_rcv.recv() if len(normalized_audio_1hz.shape) > 1: normalized_audio_1hz = np.mean(normalized_audio_1hz, axis=1) # returns [1,classes] classes=521 #scores,_, mel = yamnet.predict(np.reshape(normalized_audio_1hz, [1, -1]), steps=1) scores, emb, mel = yamnet.predict(normalized_audio_1hz, steps=1) #for _n in scores:#1 sec samples for _n in scores[-1:]: #1 sec samples top_idxs = np.argsort(_n)[::-1][:top_k] inferences = _n[top_idxs] channel.basic_publish( exchange='inference', routing_key='', body=json.dumps( dict( time=aud_time, inferences=inferences.tolist(), mel=mel.tolist(), embeddings=[], #no embeddings produced for yamnet idxs=top_idxs.tolist()))) except Exception as e: logger.exception(e)
def waveform_to_features(waveform): """Creates VGGish features using the YAMNet feature extractor.""" params = yamnet_params.Params( sample_rate=vggish_params.SAMPLE_RATE, stft_window_seconds=vggish_params.STFT_WINDOW_LENGTH_SECONDS, stft_hop_seconds=vggish_params.STFT_HOP_LENGTH_SECONDS, mel_bands=vggish_params.NUM_MEL_BINS, mel_min_hz=vggish_params.MEL_MIN_HZ, mel_max_hz=vggish_params.MEL_MAX_HZ, log_offset=vggish_params.LOG_OFFSET, patch_window_seconds=vggish_params.EXAMPLE_WINDOW_SECONDS, patch_hop_seconds=vggish_params.EXAMPLE_HOP_SECONDS) log_mel_spectrogram, features = yamnet_features.waveform_to_log_mel_spectrogram_patches( waveform, params) return features
def classifyWav(wavPath, topClasses): semanticResults = {} path = wavPath.split("/") filename = path[-1].split(".")[0] # this is our temp folder we read and write the channels to targetFolder = '/'.join(path[:-2]) + "/splitChannels/" channels = 2 # we delete all of the content first in the temp folder try: subprocess.call(f"rm {targetFolder}*.wav", shell=True) except: pass if channels == 4: subprocess.call( f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \ -map_channel 0.0.1 {targetFolder + filename}_ch1.wav \ -map_channel 0.0.2 {targetFolder + filename}_ch2.wav \ -map_channel 0.0.3 {targetFolder + filename}_ch3.wav", shell=True) elif channels == 2: subprocess.call( f"ffmpeg -i '{wavPath}' -map_channel 0.0.0 {targetFolder + filename}_ch0.wav \ -map_channel 0.0.1 {targetFolder + filename}_ch1.wav", shell=True) for i, wavfile in enumerate(os.scandir(targetFolder)): # the results of the current channel chResults = {} #print(wavfile.path) #print(wavfile.name) wav_data, sr = sf.read(wavfile.path, dtype=np.int16) waveform = wav_data / 32768.0 # The graph is designed for a sampling rate of 16 kHz, but higher rates should work too. # We also generate scores at a 10 Hz frame rate. params = yamnet_params.Params(sample_rate=sr, patch_hop_seconds=1) # Set up the YAMNet model. class_names = yamnet_model.class_names(PATH_YAMNET_CLASSES) yamnet = yamnet_model.yamnet_frames_model(params) yamnet.load_weights(PATH_YAMNET_WEIGHTS) # Run the model. scores, embeddings, _ = yamnet(waveform) scores = scores.numpy() mean_scores = np.mean(scores, axis=0) # we take the top 3 top_N = topClasses top_class_indices = np.argsort(mean_scores)[::-1][:top_N] # these are our scores rows = classes , cols = seconds top_scores = scores[:, top_class_indices].T yticks = range(0, top_N, 1) #class_names = [class_names[top_class_indices[x]] for x in yticks] # we need to match the classes later in the front - end class_names = top_class_indices for col in range(0, np.shape(top_scores)[-1]): curr_col = top_scores[:, col].flatten() chResults[col] = { int(cln): round(float(prct), 2) for cln, prct in zip(class_names, curr_col) } semanticResults[i] = chResults print(semanticResults) return semanticResults