def vae_generate(gui): live_instrument = gui.live_instrument device = gui.device model = gui.model.to(device) dials = gui.dials bars = gui.slider_bars.value() gaussian = torch.distributions.Normal(torch.zeros(100), torch.ones(100)) with torch.no_grad(): samples = [] for i in range(bars): samples.append(gaussian.sample()) sample = torch.Tensor(bars,100).to(device) torch.stack(samples, out=sample, dim=0) recon = model.decoder(sample.to(device)) recon = torch.softmax(recon, dim=3).squeeze(1) # recon /= np.max(np.abs(recon)) generated = recon[0] if bars > 1: for r in recon[1:]: generated = torch.cat((generated, r), dim=0) generated[generated < (1-gui.slider_temperature.value()/100)] = 0 generated = generated.cpu().numpy() generated = debinarizeMidi(generated, prediction=False) generated = addCuttedOctaves(generated) smoother = NoteSmoother(generated, threshold=1) generated = smoother.smooth() live_instrument.computer_play(prediction=generated) gui.is_running = False
def sample(model, temperature=0.5, smooth_threshold=0): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model.train(): model.eval() gaussian = Normal(torch.zeros(100), torch.ones(100)) # print(gaussian.sample()) with torch.no_grad(): sample = gaussian.sample() recon = model.decoder(sample.unsqueeze(0).to(device)) recon = torch.softmax(recon, dim=3) recon = recon.squeeze(0).squeeze(0).cpu().numpy() # recon /= np.max(np.abs(recon)) recon[recon < (1 - temperature)] = 0 recon = debinarizeMidi(recon, prediction=False) recon = addCuttedOctaves(recon) if smooth_threshold: smoother = NoteSmoother(recon, threshold=smooth_threshold) recon = smoother.smooth() pianorollMatrixToTempMidi(recon, prediction=True, show=True, showPlayer=False, autoplay=True)
def reconstruct(file_path, model, start_bar, end_bar, temperature=0.5, smooth_threshold=0): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model.train(): model.eval() with torch.no_grad(): sample_np = getSlicedPianorollMatrixNp(file_path) sample_np = transposeNotesHigherLower(sample_np) sample_np = cutOctaves(sample_np) sample_np = sample_np[start_bar:end_bar] sample = torch.from_numpy(sample_np).float() recon, embed, logvar = model(sample.view(-1, 1, 96, 60).to(device)) recon = torch.softmax(recon, dim=3) recon = recon.squeeze(1).cpu().numpy() # recon /= np.abs(np.max(recon)) recon[recon < (1 - temperature)] = 0 sample_play = debinarizeMidi(sample_np, prediction=False) sample_play = addCuttedOctaves(sample_play) recon = debinarizeMidi(recon, prediction=True) recon = addCuttedOctaves(recon) recon_out = recon[0] sample_out = sample_play[0] if recon.shape[0] > 1: for i in range(recon.shape[0] - 1): sample_out = np.concatenate((sample_out, sample_play[i + 1]), axis=0) recon_out = np.concatenate((recon_out, recon[i + 1]), axis=0) # plot with pypianoroll sample_plot = ppr.Track(sample_out) ppr.plot(sample_plot) recon_plot = ppr.Track(recon_out) ppr.plot(recon_plot) # smooth output smoother = NoteSmoother(recon_out, threshold=smooth_threshold) smoothed_seq = smoother.smooth() smoother_seq_plot = ppr.Track(smoothed_seq) ppr.plot(smoother_seq_plot)
def vae_main(live_instrument, model, args): # reset live input clock print("\nUser input\n") live_instrument.reset_sequence() live_instrument.reset_clock() while True: status_played_notes = live_instrument.clock() if status_played_notes: sequence = live_instrument.parse_to_matrix() live_instrument.reset_sequence() break # send live recorded sequence through model and get improvisation with torch.no_grad(): sample = np.array(np.split(sequence, args.bars)) # prepare sample for input sample = cutOctaves(sample) sample = torch.from_numpy(sample).float().to(device) sample = torch.unsqueeze(sample, 1) # model mu, logvar = model.encoder(sample) # TODO reparameterize to get new sequences here with GUI?? #reconstruction, soon ~prediction pred = model.decoder(mu) # reorder prediction pred = pred.squeeze(1) prediction = pred[0] # TODO TEMP for more sequences if pred.size(0) > 1: for p in pred[1:]: prediction = torch.cat((prediction, p), dim=0) prediction = prediction.cpu().numpy() # normalize predictions prediction /= np.abs(np.max(prediction)) # check midi activations to include rests prediction[prediction < (1 - args.temperature)] = 0 prediction = debinarizeMidi(prediction, prediction=True) prediction = addCuttedOctaves(prediction) # play predicted sequence note by note print("\nPrediction\n") live_instrument.computer_play(prediction=prediction) live_instrument.reset_sequence()
def embedding_to_midi(self, embedding, out_path='./utils/midi_files/test.mid', temperature=0.5, smooth_threshold=0): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if self.model.train(): self.model.eval() with torch.no_grad(): recon = self.model.decoder(embedding.to(device)) recon = torch.softmax(recon, dim=3) recon = recon.squeeze(0).squeeze(0).cpu().numpy() recon[recon < (1-temperature)] = 0 recon = debinarizeMidi(recon, prediction=False) recon = addCuttedOctaves(recon) if smooth_threshold: smoother = NoteSmoother(recon, threshold=smooth_threshold) recon = smoother.smooth() pianorollMatrixToTempMidi(recon, path=out_path, prediction=True, show=False, showPlayer=False, autoplay=False)
def interpolate(sample1_path, sample2_path, model, sample1_bar=0, sample2_bar=0, temperature=0.5, smooth_threshold=0, play_loud=False): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model.train(): model.eval() with torch.no_grad(): sample1 = getSlicedPianorollMatrixNp(sample1_path) sample1 = transposeNotesHigherLower(sample1) sample1 = cutOctaves(sample1[sample1_bar]) sample2 = getSlicedPianorollMatrixNp(sample2_path) sample2 = transposeNotesHigherLower(sample2) sample2 = cutOctaves(sample2[sample2_bar]) #prepare for input sample1 = torch.from_numpy(sample1.reshape(1, 1, 96, 60)).float().to(device) sample2 = torch.from_numpy(sample2.reshape(1, 1, 96, 60)).float().to(device) # embed both sequences embed1, _ = model.encoder(sample1) embed2, _ = model.encoder(sample2) # for hamming distance recon1 = model.decoder(embed1) recon1 = torch.softmax(recon1, dim=3) recon1 = recon1.squeeze(0).squeeze(0).cpu().numpy() # recon1 /= np.max(np.abs(recon1)) recon1[recon1 < (1 - temperature)] = 0 recon1 = debinarizeMidi(recon1, prediction=False) recon1 = addCuttedOctaves(recon1) recon1[recon1 > 0] = 1 hamming1 = recon1.flatten() recon2 = model.decoder(embed2) recon2 = torch.softmax(recon2, dim=3) recon2 = recon2.squeeze(0).squeeze(0).cpu().numpy() # recon2 /= np.max(np.abs(recon2)) recon2[recon2 < (1 - temperature)] = 0 recon2 = debinarizeMidi(recon2, prediction=False) recon2 = addCuttedOctaves(recon2) recon2[recon2 > 0] = 1 hamming2 = recon2.flatten() hamming_dists1 = [] hamming_dists2 = [] for i, a in enumerate(range(0, 11)): alpha = a / 10. c = (1. - alpha) * embed1 + alpha * embed2 # decode current interpolation recon = model.decoder(c) recon = torch.softmax(recon, dim=3) recon = recon.squeeze(0).squeeze(0).cpu().numpy() # recon /= np.max(np.abs(recon)) recon[recon < (1 - temperature)] = 0 recon = debinarizeMidi(recon, prediction=False) recon = addCuttedOctaves(recon) if smooth_threshold: smoother = NoteSmoother(recon, threshold=smooth_threshold) recon = smoother.smooth() #for current hamming recon_hamm = recon.flatten() recon_hamm[recon_hamm > 0] = 1 current_hamming1 = hamming(hamming1, recon_hamm) current_hamming2 = hamming(hamming2, recon_hamm) hamming_dists1.append(current_hamming1) hamming_dists2.append(current_hamming2) # plot piano roll if i == 0: recon_plot = recon else: recon_plot = np.concatenate((recon_plot, recon), axis=0) print("alpha = {}".format(alpha)) print("Hamming distance to sequence 1 is {}".format( current_hamming1)) print("Hamming distance to sequence 2 is {}".format( current_hamming2)) if play_loud: pianorollMatrixToTempMidi(recon, prediction=True, show=True, showPlayer=False, autoplay=True) alphas = np.arange(0, 1.1, 0.1) fig, ax = plt.subplots() ax.plot(alphas, hamming_dists1) ax.plot(alphas, hamming_dists2) ax.grid() fig2, ax2 = plt.subplots() # recon_plot = ppr.Track(recon_plot) downbeats = [i * 96 for i in range(11)] # recon_plot.plot(ax, downbeats=downbeats) ppr.plot_pianoroll(ax2, recon_plot, downbeats=downbeats) plt.show()
def vae_interact(gui): live_instrument = gui.live_instrument device = gui.device model = gui.model.to(device) dials = gui.dials while True: print("\nUser input\n") # reset live input clock and prerecorded sequences live_instrument.reset_sequence() live_instrument.reset_clock() while True: status_played_notes = live_instrument.clock() if status_played_notes: sequence = live_instrument.parse_to_matrix() live_instrument.reset_sequence() break if not gui.is_running: break if not gui.is_running: break # send live recorded sequence through model and get response with torch.no_grad(): # prepare sample for input sample = np.array(np.split(sequence, live_instrument.bars)) sample = cutOctaves(sample) sample = torch.from_numpy(sample).float().to(device) sample = torch.unsqueeze(sample,1) # encode mu, logvar = model.encoder(sample) # reparameterize with variance dial_vals = [] for dial in dials: dial_vals.append(dial.value()) dial_tensor = (torch.FloatTensor(dial_vals)/100.).to(device) new = mu + (dial_tensor * 0.5 * logvar.exp()) pred = model.decoder(new).squeeze(1) # for more than 1 sequence prediction = pred[0] if pred.size(0) > 1: for p in pred[1:]: prediction = torch.cat((prediction, p), dim=0) # back to cpu and normalize prediction = prediction.cpu().numpy() prediction /= np.abs(np.max(prediction)) # check midi activations to include rests prediction[prediction < (1 - gui.slider_temperature.value()/100.)] = 0 prediction = debinarizeMidi(prediction, prediction=True) prediction = addCuttedOctaves(prediction) smoother = NoteSmoother(prediction, threshold=2) prediction = smoother.smooth() # sent to robot if gui.chx_simulate_robot.isChecked(): print("\nPublisher\n") note_msg = Int32MultiArray() live_instrument.human = False live_instrument.reset_clock() play_tick = -1 old_midi_on = np.zeros(1) played_notes = [] while True: done = live_instrument.computer_clock() if live_instrument.current_tick > play_tick: play_tick = live_instrument.current_tick midi_on = np.argwhere(prediction[play_tick] > 0) if midi_on.any(): for note in midi_on[0]: if note not in old_midi_on: current_vel = int(prediction[live_instrument.current_tick,note]) mido_msg = mido.Message('note_on', note=note, velocity=current_vel) note_msg.data = mido_msg.bytes() gui.midi_publisher.publish(note_msg) played_notes.append(note) else: for note in played_notes: # self.out_port.send(mido.Message('note_off', # note=note))#, velocity=100)) played_notes.pop(0) if old_midi_on.any(): for note in old_midi_on[0]: if note not in midi_on: # self.out_port.send(mido.Message('note_off', note=note)) continue old_midi_on = midi_on if done: live_instrument.human = True live_instrument.reset_clock() break # or play in software else: print("\nPrediction\n") live_instrument.computer_play(prediction=prediction) live_instrument.reset_sequence() if not gui.is_running: break
def vae_endless(gui): live_instrument = gui.live_instrument device = gui.device model = gui.model.to(device) dials = gui.dials print("\nUser input\n") # reset live input clock and prerecorded sequences live_instrument.reset_sequence() live_instrument.reset_clock() while True: status_played_notes = live_instrument.clock() if status_played_notes: sequence = live_instrument.parse_to_matrix() live_instrument.reset_sequence() break if not gui.is_running: break while True: # send live recorded sequence through model and get response with torch.no_grad(): # prepare sample for input sample = np.array(np.split(sequence, live_instrument.bars)) sample = cutOctaves(sample) sample = torch.from_numpy(sample).float().to(device) sample = torch.unsqueeze(sample,1) # encode mu, logvar = model.encoder(sample) # reparameterize with variance dial_vals = [] for dial in dials: dial_vals.append(dial.value()) dial_tensor = torch.FloatTensor(dial_vals)/100. # print(dial_tensor) new = mu + (dial_tensor * 0.5 * logvar.exp()) pred = model.decoder(new).squeeze(1) # for more than 1 sequence prediction = pred[0] if pred.size(0) > 1: for p in pred[1:]: prediction = torch.cat((prediction, p), dim=0) # back to cpu and normalize prediction = prediction.cpu().numpy() prediction /= np.abs(np.max(prediction)) # check midi activations to include rests prediction[prediction < (1 - gui.slider_temperature.value()/100.)] = 0 prediction = debinarizeMidi(prediction, prediction=True) prediction = addCuttedOctaves(prediction) smoother = NoteSmoother(prediction, threshold=2) prediction = smoother.smooth() # play predicted sequence note by note print("\nPrediction\n") live_instrument.computer_play(prediction=prediction) live_instrument.reset_sequence() sequence = prediction if not gui.is_running: break