def interpolate(recordedAudio, instrument): global voiceWeight, defaultInstrument sample_length = 80000 instrumentType = defaultInstrumemt if(instrument != ""): instrumentType = instrument instrumentFile = "./sounds/recordings/instrument_recordings/" + instrumentType + ".wav" instrumentEncoding = "./sounds/encodings/instrument_encodings/" + instrumentType + ".npy" aud1, enc1 = load_encoding(recordedAudio, sample_length) #check if encoding already exists enc2 = None print("encoding path", instrumentEncoding) if(checkAudio(instrument + ".npy")): print("encoding exists") enc2 = np.load(instrumentEncoding) else: print("encoding does not exist") aud2, enc2 = load_encoding(instrumentFile, sample_length) np.save(instrumentEncoding, enc2) enc_mix = (1.5 * enc1 + enc2)/2.0 outputPath = './mixes/mix' + str(len(os.listdir('./mixes'))) + '.wav' fastgen.synthesize(enc_mix, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[outputPath])
def synthesizer(): print 'Synthesizing with Sample rate: ' + str( sr) + ' and Sample length: ' + str(sl) for dirpath, dirnames, filenames in os.walk(inputpath): newdir = dirpath[len(inputpath):] newdir = newdir[1:] structure = os.path.join(outputpath, newdir) print structure if not os.path.isdir(structure): os.mkdir(structure) for fname in filenames: if fnmatch.fnmatch(fname, '*.wav'): # print 'encoding path: ', print fname audio, encoding = load_encoding( os.path.join(dirpath, fname)) # np.save(structure + fname + '.npy', encoding) print 'Syntheszing ' + fname + '.. ' fastgen.synthesize( encoding, save_paths=[os.path.join(structure, fname)], checkpoint_path=ckpt, samples_per_save=sl) print fname + ': Done!' else: print("Folder: " + structure + " already exists!") return
def decode(encoding, path, filename, sample_length, model_path): print('decoding..') outdir = '/home/paperspace/data/sounds_gen/' fastgen.synthesize(encoding, save_paths=[outdir + filename], checkpoint_path=model_path, samples_per_save=sample_length) print('finished decoding..')
def decode(self): fastgen.synthesize( self.encoding, save_paths=[self.fname], checkpoint_path=self.ckpt, samples_per_save=self.sample_length, ) self.load_audio()
def decode(fname, sample_length=44100, sr=16000): fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) return synthesis
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise RuntimeError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Generate from wav files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) exts = [os.path.splitext(f)[1] for f in files] if ".wav" in exts: postfix = ".wav" elif ".npy" in exts: postfix = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") postfix = ".npy" if FLAGS.npy_only else postfix files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(postfix) ]) elif source_path.lower().endswith((".wav", ".npy")): files = [source_path] else: files = [] # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join( save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] print('loading batch..') batch_data = fastgen.load_batch(batch_files, sample_length=sample_length) # Encode waveforms encodings = batch_data if postfix == ".npy" else fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path) else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def synthesize(encoding_mix: np.ndarray, checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000"): os.makedirs(os.path.join("output", "synth"), exist_ok=True) date_and_time = time.strftime("%Y-%m-%d_%H%M%S") output = os.path.join("output", "synth", f"{date_and_time}.wav") encoding_mix = np.array([encoding_mix]) fastgen.synthesize(encoding_mix, checkpoint_path=checkpoint, save_paths=[output])
def synthesizeEncodings(encoding1Name, encoding2Name, encoding1, encoding2, encoding1Weight, encoding2Weight): global voiceWeight, defaultInstrument sample_length = 80000 voiceEncodingPath = "./sounds/encodings/voice_encodings/recording3_encoding.npy" instrumentEncodingPath = "./sounds/encodings/instrument_encodings/cello.npy" enc_mix = (encoding1Weight * encoding1 + encoding2Weight * encoding2) / 2.0 outputFile = encoding1Name + "_" + str(encoding1Weight) + "_" + encoding2Name + "_" + str(encoding2Weight) outputPath = './mixes/weight_experiments/' + outputFile + '.wav' fastgen.synthesize(enc_mix, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[outputPath])
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise RuntimeError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Generate from wav files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) exts = [os.path.splitext(f)[1] for f in files] if ".wav" in exts: postfix = ".wav" elif ".npy" in exts: postfix = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") postfix = ".npy" if FLAGS.npy_only else postfix files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(postfix) ]) elif source_path.lower().endswith((".wav", ".npy")): files = [source_path] else: files = [] # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join(save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] batch_data = fastgen.load_batch(batch_files, sample_length=sample_length) # Encode waveforms encodings = batch_data if postfix == ".npy" else fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path) else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def synthesize(encodings_mix: np.ndarray, encodings_mix_name: List[str], checkpoint: str = "checkpoints/wavenet-ckpt/model.ckpt-200000") \ -> None: """ Synthetizes the list of encodings and saves them under the list of names. This might take a long time on commodity hardware (~15 minutes) :param encodings_mix: the list of encodings to synth :param encodings_mix_name: the list of encodings names for the files :param checkpoint: the checkpoint folder """ os.makedirs(os.path.join("output", "nsynth"), exist_ok=True) encodings_mix_name = [os.path.join("output", "nsynth", encoding_mix_name + ".wav") for encoding_mix_name in encodings_mix_name] fastgen.synthesize(encodings_mix, checkpoint_path=checkpoint, save_paths=encodings_mix_name)
def interpolate(): sample_length = 80000 # from https://www.freesound.org/people/MustardPlug/sounds/395058/ aud1, enc1 = load_encoding( '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length) # from https://www.freesound.org/people/xserra/sounds/176098/ aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length) enc_mix = (enc1 + enc2) / 2.0 fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]) axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]) axs[1].set_title('Encoding 2') axs[2].plot(enc_mix[0]) axs[2].set_title('Average') fastgen.synthesize(enc_mix, save_paths='mix.wav')
def merge_sounds(audio_list, skip_existing=True): # Output length = sample_length / sample_rate about 3 seconds sample_length = 40000 sample_rate = 13300 audio_1 = audio_list[0] audio_2 = audio_list[1] audio_name_1 = audio_1.split(SEPARATOR)[-1].split('.')[0] audio_name_2 = audio_2.split(SEPARATOR)[-1].split('.')[0] output_name = ''.join(sorted([audio_name_1, audio_name_2])) output_path = '{}{}{}.wav'.format(OUTPUT_DIR, SEPARATOR, output_name) if (os.path.exists(output_path) and skip_existing): print('Skipping sounds "{}" and "{}"'.format(audio_1, audio_2)) return output_path print('Merging sounds "{}" and "{}"'.format(audio_1, audio_2)) sample_length = 35000 try: print("Loading Audio_1") aud1, enc1 = load_encoding(audio_1, sample_length=sample_length, sr=sample_rate) print("Loading Audio_2") aud2, enc2 = load_encoding(audio_2, sample_length=sample_length, sr=sample_rate) enc_mix = (enc1 + enc2) / 2.0 print("Synthesizing new audio: {}".format(output_name)) fastgen.synthesize(enc_mix, checkpoint_path=MODEL, save_paths=[output_path]) except Exception as e: print('Erro skipping combo: {},\nError: {}'.format( str(output_name), str(e))) return output_path
def Plot_SingleFile(file_name, sampleRate): audio = utils.load_audio( file_name, sample_length=70000) #sample_length for how long will it be. sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sampleRate))) #Encoding for new sound part. encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length) print(encoding.shape) np.save(file_name + '.npy', encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') #synthesis fastgen.synthesize(encoding, save_paths=['gen_' + file_name], samples_per_save=sample_length)
def main(unused_argv=None): source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise RuntimeError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Generate from wav files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) exts = [os.path.splitext(f)[1] for f in files] if ".wav" in exts: postfix = ".wav" elif ".npy" in exts: postfix = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") postfix = ".npy" if FLAGS.encodings else postfix files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(postfix) ]) elif source_path.lower().endswith(postfix): files = [source_path] else: files = [] for f in files: out_file = os.path.join( save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") tf.logging.info("OUTFILE %s" % out_file) synthesize(source_file=f, checkpoint_path=checkpoint_path, out_file=out_file, sample_length=FLAGS.sample_length)
def decoding(fname, sample_length, sr, encoding): fastgen.synthesize( encoding, save_paths=['gen_' + fname[fname.rfind('/') + 1:] ], samples_per_save=sample_length)
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise ValueError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Use directory of files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) file_extensions = [os.path.splitext(f)[1] for f in files] if ".wav" in file_extensions: file_extension = ".wav" elif ".npy" in file_extensions: file_extension = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") file_extension = ".npy" if FLAGS.npy_only else file_extension files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(file_extension) ]) # Use a single file elif source_path.lower().endswith((".wav", ".npy")): file_extension = os.path.splitext(source_path.lower())[1] files = [source_path] else: raise ValueError( "source_path {} must be a folder or file.".format(source_path)) # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join( save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] # Encode waveforms if file_extension == ".wav": batch_data = fastgen.load_batch_audio(batch_files, sample_length=sample_length) encodings = fastgen.encode(batch_data, checkpoint_path, sample_length=sample_length) # Or load encodings else: encodings = fastgen.load_batch_encodings( batch_files, sample_length=sample_length) # Synthesize multi-gpu if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path) # Single gpu else: fastgen.synthesize(encodings, save_names, checkpoint_path=checkpoint_path)
def encodeAndDecode(recordingName, recordingPath): aud1, enc1 = load_encoding(recordingPath, 80000) encodeOutputPath = './sounds/encodings/voice_encodings/' + recordingName + '_encoding' + str(len(os.listdir('./sounds/encodings/voice_encodings'))) decodeOutputPath = './mixes/single_voice_decoding/' + recordingName + '_decoding' + str(len(os.listdir('./mixes/single_voice_decoding'))) + '.wav' np.save(encodeOutputPath, enc1) fastgen.synthesize(enc1, checkpoint_path='../wavenet-ckpt/model.ckpt-200000', save_paths=[decodeOutputPath])
def main(unused_argv=None): os.environ["CUDA_VISIBLE_DEVICES"] = str(FLAGS.gpu_number) source_path = utils.shell_path(FLAGS.source_path) checkpoint_path = utils.shell_path(FLAGS.checkpoint_path) save_path = utils.shell_path(FLAGS.save_path) if not save_path: raise ValueError("Must specify a save_path.") tf.logging.set_verbosity(FLAGS.log) # Use directory of files if tf.gfile.IsDirectory(source_path): files = tf.gfile.ListDirectory(source_path) file_extensions = [os.path.splitext(f)[1] for f in files] if ".wav" in file_extensions: file_extension = ".wav" elif ".npy" in file_extensions: file_extension = ".npy" else: raise RuntimeError("Folder must contain .wav or .npy files.") file_extension = ".npy" if FLAGS.npy_only else file_extension files = sorted([ os.path.join(source_path, fname) for fname in files if fname.lower().endswith(file_extension) ]) # Use a single file elif source_path.lower().endswith((".wav", ".npy")): file_extension = os.path.splitext(source_path.lower())[1] files = [source_path] else: raise ValueError( "source_path {} must be a folder or file.".format(source_path)) # Now synthesize from files one batch at a time batch_size = FLAGS.batch_size sample_length = FLAGS.sample_length n = len(files) for start in range(0, n, batch_size): end = start + batch_size batch_files = files[start:end] save_names = [ os.path.join(save_path, "gen_" + os.path.splitext(os.path.basename(f))[0] + ".wav") for f in batch_files ] # Encode waveforms if file_extension == ".wav": batch_data = fastgen.load_batch_audio( batch_files, sample_length=sample_length) encodings = fastgen.encode( batch_data, checkpoint_path, sample_length=sample_length) # Or load encodings else: encodings = fastgen.load_batch_encodings( batch_files, sample_length=sample_length) # Synthesize multi-gpu if FLAGS.gpu_number != 0: with tf.device("/device:GPU:%d" % FLAGS.gpu_number): fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path) # Single gpu else: fastgen.synthesize( encodings, save_names, checkpoint_path=checkpoint_path)
import os import numpy as np from magenta.models.nsynth import utils from magenta.models.nsynth.wavenet import fastgen filename = '/data/input/battle1.wav' sr = 44100 audio = utils.load_audio(filename, sample_length=(sr * 4), sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, '/data/model/wavenet-ckpt/model.ckpt-200000', sample_length) print(encoding.shape) np.save(filename.replace('.wav', '') + '_encoded.npy', encoding) fastgen.synthesize( encoding, save_paths=['/data/output/test.wav'], samples_per_save=sample_length, checkpoint_path="/data/model/wavenet-ckpt/model.ckpt-200000")
st.write('Encoding took ' + str((end - start)) + ' seconds') st.write('Encoding shape ' + str(encoding.shape)) # Save encoding np.save(filenames[0] + '.npy', encoding) # Plot PCM and encoding fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(x1) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') st.pyplot() # Decoding start = time.time() fastgen.synthesize(encoding, checkpoint_path=model_dir, save_paths=['gen_' + filenames[0]], samples_per_save=sample_length) end = time.time() st.write('Decoding took ' + str((end - start)) + ' seconds') # Evaluate reconstruction x1_gen, _ = Load(output_dir, 'gen_' + filenames[0], sr=sr) fig, ax = plt.subplots(figsize=(10, 5)) ax.plot(x1_gen) ax.set_title('Reconstructed Audio Signal') st.pyplot()
fname = "03 Plimsoll Punks.wav" ckpt = "model.ckpt-200000" sr = 16000 audio = utils.load_audio(fname, sample_length=16000, sr=sr) sample_length = audio.shape[0] print ("{} samples , {} seconds".format(sample_length, sample_length/float(sr))) encoding = fastgen.encode(audio, ckpt, sample_length) print(encoding.shape) np.save(fname.split(".")[0] + ".npy", encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio); axs[0].set_title("Audio Signal") axs[1].plot(encoding[0]); axs[1].set_title("NSynth Encoding") # Verify fast to generate encoding fastgen.synthesize(encoding, save_paths=["gen_" + fname], samples_per_save=sample_length) sr = 16000 # Output file. Listen to it to see what nerual synthesis does. Note this uses 8-bit mu-law # therefore the sound quality is not good. Will later used better resolution # Be patient. This takes at least 15 min to teminate synthesis = utils.load_audio("gen_" + fname, sample_length=sample_length, sr=sr)
def decoding(fname, sample_length, sr, encoding): fastgen.synthesize(encoding, save_paths=['gen_' + fname[fname.rfind('/') + 1:]], samples_per_save=sample_length)
audio, encoding = load_encoding(_file, sample_length, sample_rate, _model) np.save(without_extension(_file) + '.npy', encoding) print("(batch_size, time_steps, dimensions) :", encoding.shape) # plotting # if PLOT: fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio) axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]) axs[1].set_title('NSynth Encoding') # decoding # '''Synthesizes audio from the encoding and saves it''' fastgen.synthesize( encoding, save_paths=[without_extension(_file) + "_decoded." + get_extension(_file)], samples_per_save=sample_length) if DEBUG: print("Generation for normal encoding achieved !") # slower and faster encoding # encoding_slower = timestretch(encoding, 1.5) encoding_faster = timestretch(encoding, 0.5) if PLOT: fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True) axs[0].plot(encoding[0]) axs[0].set_title('Encoding (Normal Speed)') axs[1].plot(encoding_faster[0]) axs[1].set_title('Encoding (Faster))')
def synth(): fastgen.synthesize(xfade_encoding, checkpoint_path = model_dir, save_paths=['enc_' + fade_type + '_' + FirstSong_fname + \ SecondSong_fname], samples_per_save=sample_length) return None
from magenta.models.nsynth.wavenet import fastgen from scipy import signal CUDA_VISIBLE_DEVICES = "" sample_length = 512 encoding_sine = np.load('wavetable_sine.npy') encoding_tri = np.load('wavetable_tri.npy') encoding_saw = np.load('wavetable_saw.npy') #sawsin for i in range(1, 100): filename = '../prerender/SawSin/SawSin_0.' + '%02d.txt' % i time0 = time.time() print('decoding saw+sine interpolation:' + '%02d' % i) fastgen.synthesize((encoding_saw * (100 - i) + encoding_sine * i) / 100, save_paths=['tmp'], checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000', samples_per_save=sample_length) audio = utils.load_audio('tmp', sample_length=512, sr=16000) np.savetxt(filename, [np.around(audio, decimals=5)], delimiter=',', fmt='%1.5f') print(time.time() - time0) #sintri for i in range(1, 100): filename = '../prerender/SinTri/SinTri_0.' + '%02d.txt' % i time0 = time.time() print('decoding sine+tri interpolation:' + '%02d' % i) fastgen.synthesize((encoding_sine * (100 - i) + encoding_tri * i) / 100, save_paths=['tmp'], checkpoint_path='Model/wavenet-ckpt/model.ckpt-200000',
def unused(): # from https://www.freesound.org/people/MustardPlug/sounds/395058/ fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav' sr = 16000 audio = utils.load_audio(fname, sample_length=40000, sr=sr) sample_length = audio.shape[0] print('{} samples, {} seconds'.format(sample_length, sample_length / float(sr))) encoding = fastgen.encode(audio, 'model.ckpt-200000', sample_length) print(encoding.shape) np.save(fname + '.npy', encoding) fig, axs = plt.subplots(2, 1, figsize=(10, 5)) axs[0].plot(audio); axs[0].set_title('Audio Signal') axs[1].plot(encoding[0]); axs[1].set_title('NSynth Encoding') fastgen.synthesize(encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) sr = 16000 synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) def load_encoding(fname, sample_length=None, sr=16000, ckpt='model.ckpt-200000'): audio = utils.load_audio(fname, sample_length=sample_length, sr=sr) encoding = fastgen.encode(audio, ckpt, sample_length) return audio, encoding # from https://www.freesound.org/people/maurolupo/sounds/213259/ fname = '213259__maurolupo__girl-sings-laa.wav' sample_length = 32000 audio, encoding = load_encoding(fname, sample_length) fastgen.synthesize( encoding, save_paths=['gen_' + fname], samples_per_save=sample_length) synthesis = utils.load_audio('gen_' + fname, sample_length=sample_length, sr=sr) # use image interpolation to stretch the encoding: (pip install scikit-image) from skimage.transform import resize def timestretch(encodings, factor): min_encoding, max_encoding = encoding.min(), encoding.max() encodings_norm = (encodings - min_encoding) / (max_encoding - min_encoding) timestretches = [] for encoding_i in encodings_norm: stretched = resize(encoding_i, (int(encoding_i.shape[0] * factor), encoding_i.shape[1]), mode='reflect') stretched = (stretched * (max_encoding - min_encoding)) + min_encoding timestretches.append(stretched) return np.array(timestretches) # from https://www.freesound.org/people/MustardPlug/sounds/395058/ fname = '395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav' sample_length = 40000 audio, encoding = load_encoding(fname, sample_length) audio = utils.load_audio('gen_slower_' + fname, sample_length=None, sr=sr) Audio(audio, rate=sr) encoding_slower = timestretch(encoding, 1.5) encoding_faster = timestretch(encoding, 0.5) fig, axs = plt.subplots(3, 1, figsize=(10, 7), sharex=True, sharey=True) axs[0].plot(encoding[0]); axs[0].set_title('Encoding (Normal Speed)') axs[1].plot(encoding_faster[0]); axs[1].set_title('Encoding (Faster))') axs[2].plot(encoding_slower[0]); axs[2].set_title('Encoding (Slower)') fastgen.synthesize(encoding_faster, save_paths=['gen_faster_' + fname]) fastgen.synthesize(encoding_slower, save_paths=['gen_slower_' + fname]) sample_length = 80000 # from https://www.freesound.org/people/MustardPlug/sounds/395058/ aud1, enc1 = load_encoding('395058__mustardplug__breakbeat-hiphop-a4-4bar-96bpm.wav', sample_length) # from https://www.freesound.org/people/xserra/sounds/176098/ aud2, enc2 = load_encoding('176098__xserra__cello-cant-dels-ocells.wav', sample_length) enc_mix = (enc1 + enc2) / 2.0 fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]); axs[1].set_title('Encoding 2') axs[2].plot(enc_mix[0]); axs[2].set_title('Average') fastgen.synthesize(enc_mix, save_paths='mix.wav') def fade(encoding, mode='in'): length = encoding.shape[1] fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) / float(length)))).reshape(1, -1, 1) if mode == 'in': return fadein * encoding else: return (1.0 - fadein) * encoding fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Original Encoding') axs[1].plot(fade(enc1, 'in')[0]); axs[1].set_title('Fade In') axs[2].plot(fade(enc1, 'out')[0]); axs[2].set_title('Fade Out') def crossfade(encoding1, encoding2): return fade(encoding1, 'out') + fade(encoding2, 'in') fig, axs = plt.subplots(3, 1, figsize=(10, 7)) axs[0].plot(enc1[0]); axs[0].set_title('Encoding 1') axs[1].plot(enc2[0]); axs[1].set_title('Encoding 2') axs[2].plot(crossfade(enc1, enc2)[0]); axs[2].set_title('Crossfade') fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])
def crossfade(): return fastgen.synthesize(_crossfade(enc1, enc2), save_paths=['crossfade.wav'])
plt.title('NSynth Encoding') except Exception as e: print e """# Synthesize On the GPU, this should take about 4 minutes per 1 second of audio per a batch. """ #@title Synthesize Interpolations print('Total Iterations to Complete: %d\n' % SAMPLE_LENGTH) encodings = np.array(z_list) save_paths = ['/content/' + name + '.wav' for name in name_list] fastgen.synthesize(encodings, save_paths=save_paths, checkpoint_path=ckpt_path, samples_per_save=int(SAMPLE_LENGTH / 10)) #@title Download Interpolations for fname in save_paths: print('Downloading: %s' % fname.split('/')[-1]) download(fname) #@title Listen to the outputs print("Originals:\n") for fname in file_list: synth_audio = utils.load_audio(fname, sample_length=SAMPLE_LENGTH, sr=SR) print(get_name(fname))
def Combine_Synth(file1, file2): sample_length = 20000 #Duration aud1, enc1 = load_encoding(file1, sample_length) aud2, enc2 = load_encoding(file2, sample_length) fastgen.synthesize(crossfade(enc1, enc2), save_paths=['crossfade.wav'])