示例#1
0

def step(inp_t, inpgate_t, h1_tm1, h2_tm1, h3_tm1):
    h1 = GRU(inp_t, inpgate_t, h1_tm1, h1_dim, h1_dim, random_state)
    h1_t, h1gate_t = GRUFork([h1], [h1_dim], h2_dim, random_state)
    h2 = GRU(h1_t, h1gate_t, h2_tm1, h2_dim, h2_dim, random_state)
    h2_t, h2gate_t = GRUFork([h2], [h2_dim], h3_dim, random_state)
    h3 = GRU(h2_t, h2gate_t, h3_tm1, h3_dim, h3_dim, random_state)
    return h1, h2, h3


h1, h2, h3 = scan(step, [inp_proj, inpgate_proj], [init_h1, init_h2, init_h3])
final_h1, final_h2, final_h3 = [ni(h1, -1), ni(h2, -1), ni(h3, -1)]

pred = Linear([h3], [h3_dim], out_dim, random_state)
cost = tf.reduce_mean(categorical_crossentropy(softmax(pred), target))

# cost in bits
# cost = cost * 1.44269504089
params = tf.trainable_variables()
print_network(params)
grads = tf.gradients(cost, params)
grads = [tf.clip_by_value(grad, -grad_clip, grad_clip) for grad in grads]
opt = tf.train.AdamOptimizer(learning_rate)
updates = opt.apply_gradients(zip(grads, params))


def _loop(itr, sess, inits=None, do_updates=True):
    if inits is None:
        i_h1 = np.zeros((batch_size, h1_dim)).astype("float32")
        i_h2 = np.zeros((batch_size, h2_dim)).astype("float32")
示例#2
0
                 outgate_t + outctxgate_proj,
                 h1_tm1,
                 dec_h1_dim,
                 dec_h1_dim,
                 random_state,
                 mask=outmask_t)
    return dec_h1


dec_h1 = scan(dec_step, [out_proj, outgate_proj, target_mask], [init_dec_h1])

# Add decode context with shape/broadcast games
ctx = broadcast(final_enc_h1, dec_h1)
pred = Linear([dec_h1, ctx], [dec_h1_dim, enc_h1_dim], out_dim, random_state)

full_cost = categorical_crossentropy(softmax(pred), target)
cost = tf.reduce_mean(target_mask * full_cost)

# cost in bits
# cost = cost * 1.44269504089
params = tf.trainable_variables()
print_network(params)
grads = tf.gradients(cost, params)
grad_clip = 5.0
grads = [tf.clip_by_value(grad, -grad_clip, grad_clip) for grad in grads]
opt = tf.train.AdamOptimizer(learning_rate)
updates = opt.apply_gradients(zip(grads, params))


def _loop(X_mb, X_mb_mask, y_mb, y_mb_mask, do_updates=True):
    i_enc_h1 = np.zeros((batch_size, enc_h1_dim)).astype("float32")
示例#3
0
target_note_embed = Multiembedding(note_target, n_note_symbols, note_embed_dim,
                                   random_state)
target_note_masked = Automask(target_note_embed, n_notes)

costs = []
note_preds = []
duration_preds = []
for i in range(n_notes):
    note_pred = Linear([h1, h2, target_note_masked[i]],
                       [h_dim, h_dim, n_notes * note_embed_dim],
                       note_out_dims[i],
                       random_state,
                       weight_norm=False)
    # reweight by empirical counts?
    n = categorical_crossentropy(softmax(note_pred),
                                 note_target[:, :, i],
                                 class_weights={0: .001})
    cost = tf.reduce_sum(n)
    note_preds.append(note_pred)
    costs.append(cost)

cost = sum(costs)  #/ (sequence_length * batch_size)

# cost in bits
# cost = cost * 1.44269504089
params = tf.trainable_variables()
grads = tf.gradients(cost, params)
grads = [tf.clip_by_value(grad, -grad_clip, grad_clip) for grad in grads]
opt = tf.train.AdamOptimizer(learning_rate)
updates = opt.apply_gradients(zip(grads, params))
示例#4
0
                       note_out_dims[i],
                       random_state,
                       weight_norm=weight_norm_outputs,
                       name=name_note)
    duration_pred = Linear([
        h1[:, :, :h_dim], scan_inp, target_note_masked[i],
        target_duration_masked[i]
    ], [
        h_dim, scan_inp_dim, n_notes * note_embed_dim,
        n_notes * duration_embed_dim
    ],
                           duration_out_dims[i],
                           random_state,
                           weight_norm=weight_norm_outputs,
                           name=name_dur)
    n = categorical_crossentropy(softmax(note_pred), note_target[:, :, i])
    d = categorical_crossentropy(softmax(duration_pred), duration_target[:, :,
                                                                         i])
    cost = n_duration_symbols * tf.reduce_mean(
        n) + n_note_symbols * tf.reduce_mean(d)
    cost /= (n_duration_symbols + n_note_symbols)
    note_preds.append(note_pred)
    duration_preds.append(duration_pred)
    costs.append(cost)

# 4 notes pitch and 4 notes duration
cost = sum(costs) / float(n_notes + n_notes)

params = tf.trainable_variables()
grads = tf.gradients(cost, params)
grads = [tf.clip_by_value(grad, -grad_clip, grad_clip) for grad in grads]