valid_itr.reset()

        prev_h1, prev_h2, prev_h3 = [np_zeros((minibatch_size, n_hid))
                                     for i in range(3)]
        prev_kappa = np_zeros((minibatch_size, att_size))
        prev_w = np_zeros((minibatch_size, n_chars))
        bias = args.bias
        if args.sample is not None:
            predict_function = checkpoint_dict["predict_function"]
            attention_function = checkpoint_dict["attention_function"]
            sample_function = checkpoint_dict["sample_function"]
            if args.write is not None:
                sample_string = args.write
                print("Sampling using sample string %s" % sample_string)
                oh = dense_to_one_hot(
                    np.array([vocabulary[c] for c in sample_string]),
                    vocabulary_size)
                c_mb = np.zeros(
                    (len(oh), minibatch_size, oh.shape[-1])).astype(c_mb.dtype)
                c_mb[:len(oh), :, :] = oh[:, None, :]
                c_mb = c_mb[:len(oh)]
                c_mb_mask = np.ones_like(c_mb[:, :, 0])

            if args.sample_length is None:
                raise ValueError("Broken...")
                # Automatic sampling stop as described in Graves' paper
                # Assume an average of 30 timesteps per char
                n_steps = 30 * c_mb.shape[0]
                step_inc = n_steps
                max_steps = 25000
                max_steps_buf = max_steps + n_steps
        X_mb, X_mb_mask, c_mb, c_mb_mask = next(valid_itr)
        valid_itr.reset()
        prev_h1, prev_h2, prev_h3 = [
            np_zeros((minibatch_size, n_hid)) for i in range(3)
        ]
        prev_kappa = np_zeros((minibatch_size, att_size))
        prev_w = np_zeros((minibatch_size, n_chars))
        if args.sample is not None:
            predict_function = checkpoint_dict["predict_function"]
            attention_function = checkpoint_dict["attention_function"]
            sample_function = checkpoint_dict["sample_function"]
            if args.write is not None:
                sample_string = args.write
                print("Sampling using sample string %s" % sample_string)
                oh = dense_to_one_hot(
                    np.array([vocabulary[c] for c in sample_string]),
                    vocabulary_size)
                c_mb = np.zeros(
                    (len(oh), minibatch_size, oh.shape[-1])).astype(c_mb.dtype)
                c_mb[:len(oh), :, :] = oh[:, None, :]
                c_mb = c_mb[:len(oh)]
                c_mb_mask = np.ones_like(c_mb[:, :, 0])

            if args.sample_length is None:
                raise ValueError("NYI - use -sl or --sample_length ")
            else:
                fixed_steps = args.sample_length
                completed = []
                init_x = np.zeros_like(X_mb[0])
                for i in range(fixed_steps):
                    rvals = sample_function(init_x, c_mb, c_mb_mask, prev_h1,
Exemplo n.º 3
0
from theano.tensor.shared_randomstreams import RandomStreams
from kdllib import gradient_clipping, make_weights, make_biases
from kdllib import dense_to_one_hot, adam
from kdllib import fetch_fruitspeech_spectrogram
from kdllib import midiwrap, fetch_nottingham
import cPickle as pickle

midiread, midiwrite = midiwrap()

#Don't use a python long as this don't work on 32 bits computers.
np.random.seed(0xbeef)
rng = RandomStreams(seed=np.random.randint(1 << 30))
theano.config.warn.subtensor_merge_bug = False

key_range, dt, dataset = fetch_nottingham()
dataset = [dense_to_one_hot(d, n_classes=2) for d in dataset]

DEBUG = True


def fast_dropout(rng, x, debug=DEBUG):
    '''
    Multiply activations by N(1,1)
    '''
    if debug:
        return x
    else:
        mask = rng.normal(size=x.shape, avg=1., dtype=theano.config.floatX)
        return x * mask