def load_eval_data(): """We evaluate on the last num_samples.""" texts, _ = create_train_data() if hp.sanity_check: # We generate samples for the same texts as the ones we've used for training. texts = texts[:hp.batch_size] else: texts = texts[-hp.num_samples:] # return texts X = np.zeros([len(texts), hp.max_len, hp.n_mels * hp.r]) print texts #texts = tf.convert_to_tensor(texts) for i, text in enumerate(texts): _spectrogram_in, _magnitude_in = get_spectrograms(text) _spectrogram_in = reduce_frames(_spectrogram_in, hp.win_length // hp.hop_length, hp.r) X[i, :_spectrogram_in.shape[0], :] = _spectrogram_in # print(_spectrogram_in.shape) # X = np.zeros(shape=[len(texts), hp.n_mels*hp.r], dtype=np.float32) # for i, text in enumerate(texts): # _spectrogram_in, _magnitude_in = get_spectrograms(texts) # _text = np.fromstring(text, np.int32) # byte to int # X[i, :len(_spectrogram_in)] = _spectrogram_in return X
def generarDatos(dataset_path): for path, subdirs, files in os.walk(dataset_path, topdown=False): for file in files: # consider only kern files if file[-3:] == "wav": mel, mag = get_spectrograms(os.path.join(dataset_path, file)) np.save("datos/audioProcesado/" + file[:-4] + '.pt', mel)
def __getitem__(self, idx): wav_name = os.path.join(self.root_dir, self.landmarks_frame.ix[idx, 0]) + '.wav' mel, mag = get_spectrograms(wav_name) np.save(wav_name[:-4] + '.pt', mel) np.save(wav_name[:-4] + '.mag', mag) sample = {'mel':mel, 'mag': mag} return sample
def __getitem__(self, idx): wav_name = self.get_wav_path(self.array_indexes[idx]) mel, mag = get_spectrograms(wav_name) np.save(wav_name[:-4] + '.pt', mel) np.save(wav_name[:-4] + '.mag', mag) sample = {'mel': mel, 'mag': mag} return sample
def process(args): (tfid, split_dataset) = args writer = tf.python_io.TFRecordWriter(os.path.join(hp.TRAIN_DATASET_PATH, f'train_{tfid}.tfrecord')) for i in tqdm(split_dataset): text = i[0] fpath = i[1] idxs = match_vocab(text) mel, mag = get_spectrograms(fpath) example = tf.train.Example(features=tf.train.Features(feature={ 'x': tf.train.Feature(int64_list=tf.train.Int64List(value=idxs.reshape(-1))), 'y': tf.train.Feature(float_list=tf.train.FloatList(value=mel.reshape(-1))), 'z': tf.train.Feature(float_list=tf.train.FloatList(value=mag.reshape(-1))) }))
def __getitem__(self, idx): fname = self.landmarks_frame.iloc[idx, 0] spkr, emo, fnum = fname.strip().split('_') # spkr_emo_fnum wav_name = os.path.join(self.root_dir, spkr, emo, 'wav_22', ("{:05d}".format(int(fnum)) + '.wav')) preprocss_name = os.path.join(hp.preprocess_path, fname) mel, mag = get_spectrograms(wav_name) np.save(preprocss_name + '.pt', mel) np.save(preprocss_name + '.mag', mag) sample = {'mel': mel, 'mag': mag} return sample
def main(): genrelist = ['reggaeton', 'bachata', 'salsa', 'merengue', 'chachacha'] # Parse arguments parser = argparse.ArgumentParser() parser.add_argument( 'input', type=str, help= 'File containing the path to the audios to classify, one path per line.' 'It can also be the path to a folder with audios instead of a file.') parser.add_argument('-o', '--output_file', type=str, default='output.csv', help='Output file with the classified audios') parser.add_argument('-s', '--silent', action='store_true', help='Dont print the results, only write the file') parser.add_argument('-m', '--model', type=str, default='model.pt', help='File with the model dict_state') args = parser.parse_args() mod_state = args.model if not os.path.isfile(mod_state): raise IOError( 'Model state dictionary {} doesn not exist'.format(mod_state)) # Load the model model = GenreClassifier() model.load_state_dict(t.load(mod_state)) model.eval() device = t.device('cuda' if t.cuda.is_available() else 'cpu') print('Using device:', device) print('---------------') model.to(device) # Additional Info when using cuda if device.type == 'cuda': print(t.cuda.get_device_name(0)) print('Memory Usage:') print('Allocated:', round(t.cuda.memory_allocated(0) / 1024**3, 1), 'GB') print('Cached: ', round(t.cuda.memory_cached(0) / 1024**3, 1), 'GB') if os.path.isfile(args.input): # Read files from input file, segment them, and predict the genre df = pd.DataFrame(columns=['song_path'] + genrelist) with open(args.input, 'r') as inputfile: for idx, song in enumerate(inputfile.readlines()): song = song.strip('\n') segments = segment_audio(song) audio_spectrograms = get_spectrograms(segments) with t.no_grad(): pred = model(audio_spectrograms.to(device)) aux = t.exp(pred) percentage = aux.sum(dim=0) / len(aux) percentage = percentage.tolist() df.loc[idx] = [song] + percentage if not args.silent: print("Song '...{:.30}' is genre {:10}".format( song[-30:], genrelist[np.argmax(percentage)])) df.to_csv(args.output_file, index=False) elif os.path.isdir(args.input): # Read files from folder, segment them, and predict the genre df = pd.DataFrame(columns=['song_path'] + genrelist) for idx, song in enumerate(os.listdir(args.input)): song = os.path.join(args.input, song) segments = segment_audio(song) audio_spectrograms = get_spectrograms(segments) with t.no_grad(): pred = model(audio_spectrograms.to(device)) aux = t.exp(pred) percentage = aux.sum(dim=0) / len(aux) percentage = percentage.tolist() df.loc[idx] = [song] + percentage if not args.silent: print("Song '...{:.30}' is genre {:10}".format( song[-30:], genrelist[np.argmax(percentage)])) df.to_csv(args.output_file, index=False)
def main(): parser = argparse.ArgumentParser( description="Evaluate custom waveform files using pretrained MOSnet.") parser.add_argument("--rootdir", default=None, type=str, help="rootdir of the waveforms to be evaluated") parser.add_argument("--pretrained_model", default="./pre_trained/cnn_blstm.h5", type=str, help="pretrained model file") args = parser.parse_args() #### tensorflow & gpu settings #### # 0 = all messages are logged (default behavior) # 1 = INFO messages are not printed # 2 = INFO and WARNING messages are not printed # 3 = INFO, WARNING, and ERROR messages are not printed os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'} tf.debugging.set_log_device_placement(False) # set memory growth gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) ################################### # find waveform files wavfiles = sorted(find_files(args.rootdir, "*.wav")) # init model print("Loading model weights") MOSNet = CNN_BLSTM() model = MOSNet.build() model.load_weights(args.pretrained_model) # evaluation print("Start evaluating {} waveforms...".format(len(wavfiles))) results = [] for wavfile in tqdm(wavfiles): # spectrogram mag_sgram = utils.get_spectrograms(wavfile) timestep = mag_sgram.shape[0] mag_sgram = np.reshape(mag_sgram, (1, timestep, utils.SGRAM_DIM)) # make prediction Average_score, Frame_score = model.predict(mag_sgram, verbose=0, batch_size=1) # write to list result = wavfile + " {:.3f}".format(Average_score[0][0]) results.append(result) # print average average = np.mean( np.array([float(line.split(" ")[-1]) for line in results])) print("Average: {}".format(average)) # write final raw result resultrawpath = os.path.join(args.rootdir, "MOSnet_result_raw.txt") with open(resultrawpath, "w") as outfile: outfile.write("\n".join(sorted(results))) outfile.write("\nAverage: {}\n".format(average))