示例#1
0
def costalignment(a):
    plt.clf()
    #plt.figure(figsize=(8,6))
    plt.gray()
    fixcosts(a)
    states = np.arange(0, a.state_count() + 1)
    times = np.arange(0, a.frame_count()) / audio.sample_rate * audio.nsamples
    #plt.axes([0,0,np.max(times),np.max(states)])
    x, y = np.meshgrid(times, states)
    plt.pcolormesh(x, y, a.costs.transpose())

    test_plot, = plt.plot(times, [s + 1 for s in a.path])

    wav = data.get_audio(a.name)
    score = data.get_score(a.name, wav.total_length)
    truth = data.get_truth(a.name, score, wav)

    if truth:
        t = truth_path = truth.get_path()
        truth_plot, = plt.plot(times, [s + 1 for s in truth_path])
        bx = plt.legend([test_plot, truth_plot], ("Automatic alignment", "Ground truth"),\
                numpoints=1, handletextpad=0.5, loc="upper left")
        bx.draw_frame(False)

    plt.ylabel('State number in score sequence')
    plt.xlabel('Time in audio recording (s)')
    processplt(a, plt, "costmap")
示例#2
0
def costalignment(a):
    plt.clf()
    # plt.figure(figsize=(8,6))
    plt.gray()
    fixcosts(a)
    states = np.arange(0, a.state_count() + 1)
    times = np.arange(0, a.frame_count()) / audio.sample_rate * audio.nsamples
    # plt.axes([0,0,np.max(times),np.max(states)])
    x, y = np.meshgrid(times, states)
    plt.pcolormesh(x, y, a.costs.transpose())

    test_plot, = plt.plot(times, [s + 1 for s in a.path])

    wav = data.get_audio(a.name)
    score = data.get_score(a.name, wav.total_length)
    truth = data.get_truth(a.name, score, wav)

    if truth:
        t = truth_path = truth.get_path()
        truth_plot, = plt.plot(times, [s + 1 for s in truth_path])
        bx = plt.legend(
            [test_plot, truth_plot],
            ("Automatic alignment", "Ground truth"),
            numpoints=1,
            handletextpad=0.5,
            loc="upper left",
        )
        bx.draw_frame(False)

    plt.ylabel("State number in score sequence")
    plt.xlabel("Time in audio recording (s)")
    processplt(a, plt, "costmap")
示例#3
0
    def testLossNotNaN(self):
        data = get_audio(None, 'damped_sine', hparams)
        model = RhoCMPS(hparams, data_iterator=data)

        with self.cached_session() as sess:
            sess.run(tf.global_variables_initializer())
            self.assertFalse(np.isnan(model.loss.eval()))
示例#4
0
文件: views.py 项目: hoyes/scorealign
def scoredata(request, name):
    audio = data.get_audio(name, 0.0)
    score = data.get_score(name, audio.total_length)
    output = {
        'events': score.events(),
        'duration': score.length(),
        'note_range': score.note_range()
    }
    return HttpResponse(json.dumps(output))
示例#5
0
    def testRhoEvolvedWithDataRemainsNormalized(self):
        data = get_audio(None, 'damped_sine', hparams)
        model = RhoCMPS(hparams, data_iterator=data)
        rho_out = model.rho_evolve_with_data()

        with self.cached_session() as sess:
            sess.run(tf.global_variables_initializer())
            self.assertAllClose(tf.trace(rho_out),
                                tf.ones_like(rho_out[:, :, 0, 0]),
                                rtol=1e-5)
示例#6
0
    def testPsiEvolvedWithDataRemainsNormalized(self):
        data = get_audio(None, 'damped_sine', hparams)
        model = PsiCMPS(hparams, data_iterator=data)
        psi_out = model.psi_evolve_with_data()

        with self.cached_session() as sess:
            sess.run(tf.global_variables_initializer())
            self.assertAllClose(tf.norm(psi_out, axis=-1),
                                tf.ones_like(psi_out[:, :, 0]),
                                rtol=1e-5)
示例#7
0
def align(name, lik_method, path_method, const_width=None, rel_width=None):
    a = data.get_alignment(name, lik_method, path_method)
    if a == None:
        audio = data.get_audio(name)
        score = data.get_score(name)
        
        proc = AlignmentProcessor(lik_method, path_method)
        a = proc.align(audio, score, const_width, rel_width)
        truth = data.get_truth(name, score, audio)
        if truth: a.truth_path = truth.get_path()
        a.set_name(name)
        data.save_alignment(a)
    return a
示例#8
0
def align(name, lik_method, path_method, const_width=None, rel_width=None):
    a = data.get_alignment(name, lik_method, path_method)
    if a == None:
        audio = data.get_audio(name)
        score = data.get_score(name)

        proc = AlignmentProcessor(lik_method, path_method)
        a = proc.align(audio, score, const_width, rel_width)
        truth = data.get_truth(name, score, audio)
        if truth: a.truth_path = truth.get_path()
        a.set_name(name)
        data.save_alignment(a)
    return a
示例#9
0
    def testCorrectShape(self):

        data = get_audio(None, 'damped_sine', hparams)
        with self.cached_session() as sess:
            self.assertEqual(data.eval().shape, (hparams.minibatch_size, FLAGS.sample_duration))
示例#10
0
def main(argv):
    # hparams = HParams(minibatch_size=8, bond_dim=8, delta_t=1/FLAGS.sample_rate, sigma=0.000001,
    #                   h_reg=200/(np.pi * FLAGS.sample_rate)**2, r_reg=2000/(np.pi * FLAGS.sample_rate),
    #                   initial_rank=None, A=100., learning_rate=0.001)

    hparams = HParams(minibatch_size=8,
                      bond_dim=8,
                      delta_t=1 / FLAGS.sample_rate,
                      sigma=0.0001,
                      h_reg=200 / (np.pi * FLAGS.sample_rate)**2,
                      r_reg=0.1,
                      initial_rank=None,
                      A=100.,
                      learning_rate=0.001)
    hparams.parse(FLAGS.hparams)

    with tf.variable_scope("data"):
        data = get_audio(datadir=FLAGS.datadir,
                         dataset=FLAGS.dataset,
                         hps=hparams)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        if FLAGS.mps_model == 'rho_mps':
            model = RhoCMPS(hparams=hparams, data_iterator=data)
        else:
            model = PsiCMPS(hparams=hparams, data_iterator=data)

        h_l2sqnorm = tf.reduce_sum(tf.square(model.freqs))
        r_l2sqnorm = tf.real(tf.reduce_sum(tf.conj(model.R) * model.R))

    with tf.variable_scope("total_loss"):
        total_loss = model.loss + hparams.h_reg * h_l2sqnorm \
                                    + hparams.r_reg * r_l2sqnorm

    with tf.variable_scope("summaries"):
        tf.summary.scalar("A", tf.cast(model.A, dtype=tf.float32))
        tf.summary.scalar("sigma", tf.cast(model.sigma, dtype=tf.float32))
        tf.summary.scalar("h_l2norm", tf.sqrt(h_l2sqnorm))
        tf.summary.scalar("r_l2norm", tf.sqrt(r_l2sqnorm))

        gr_rate = 2 * np.pi * hparams.sigma**2 * r_l2sqnorm / hparams.bond_dim
        tf.summary.scalar("gr_decay_time", 1 / gr_rate)

        tf.summary.scalar("model_loss", tf.reshape(model.loss, []))
        tf.summary.scalar("total_loss", tf.reshape(total_loss, []))

        tf.summary.audio("data",
                         data,
                         sample_rate=FLAGS.sample_rate,
                         max_outputs=5)
        tf.summary.histogram("frequencies", model.freqs / (2 * np.pi))

        if FLAGS.visualize:
            # Doesn't work for Datasets where batch size can't be inferred
            data_waveform_op = tfplot.autowrap(waveform_plot, batch=True)(
                data, hparams.minibatch_size * [hparams.delta_t])
            tf.summary.image("data_waveform", data_waveform_op)

            if FLAGS.num_samples != 0:
                samples = model.sample(FLAGS.num_samples,
                                       FLAGS.sample_duration)
                sample_waveform_op = tfplot.autowrap(
                    waveform_plot,
                    batch=True)(samples, FLAGS.num_samples * [hparams.delta_t])
                tf.summary.image("sample_waveform", sample_waveform_op)

    step = tf.get_variable("global_step", [],
                           tf.int64,
                           tf.zeros_initializer(),
                           trainable=False)
    train_op = tf.train.AdamOptimizer(
        learning_rate=hparams.learning_rate).minimize(total_loss,
                                                      global_step=step)

    # TODO Unrolling in time?

    tf.contrib.training.train(
        train_op,
        save_checkpoint_secs=60,
        logdir=
        f"{FLAGS.logdir}/{hparams.bond_dim}_{hparams.delta_t}_{hparams.minibatch_size}"
    )
示例#11
0
文件: test.py 项目: hoyes/scorealign
#!/usr/bin/env python
import data
score = data.get_score("mozart")
audio = data.get_audio("mozart")
truth = data.get_truth("mozart", score, audio)
print truth.get_path()