def __init__(self, bar_embedder, context_size, enc_lstm_size, dec_lstm_size, enc_use_meta=False, dec_use_meta=False, compile_now=False): self.n_voices = 6 prev_bars = [Input(shape=(None,), name="context_" + str(i)) for i in range(context_size)] if enc_use_meta or dec_use_meta: meta_cat = Input(shape=(None,), name="metaData") embeddings = [bar_embedder(pb) for pb in prev_bars] embed_size = bar_embedder.embedding_size if enc_use_meta: embeddings = [Concat([emb, meta_cat]) for emb in embeddings] embed_size = bar_embedder.embedding_size + self.n_voices embeddings_stacked = Lambda(lambda ls: K.stack(ls, axis=1), output_shape=(context_size, embed_size) )(embeddings) # encode embeddings_processed = LSTM(enc_lstm_size)(embeddings_stacked) encoded_size = enc_lstm_size # decode if dec_use_meta: encoded_size += self.n_voices embeddings_processed = Concat([embeddings_processed, meta_cat]) repeated = Lambda(self._repeat, output_shape=(None, encoded_size))\ ([prev_bars[0], embeddings_processed]) decoded = LSTM(dec_lstm_size, return_sequences=True, name='dec_lstm')(repeated) preds = TimeDistributed(Dense(bar_embedder.vocab_size, activation='softmax'), name='softmax_layer')(decoded) if enc_use_meta or dec_use_meta: super().__init__(inputs=[*prev_bars, meta_cat], outputs=preds) else: super().__init__(inputs=prev_bars, outputs=preds) self.params = [context_size, enc_lstm_size, dec_lstm_size, enc_use_meta, dec_use_meta] self.use_meta = enc_use_meta or dec_use_meta if compile_now: self.compile_default()
def __init__(self, melody_encoder, rhythm_embed_size, dec_lstm_size, V, enc_use_meta=False, dec_use_meta=False, compile_now=False): self.n_voices = 9 self.use_meta = enc_use_meta or dec_use_meta m = melody_encoder.m prev_melodies = Input(shape=(None, m), name="contexts") bar_embedding = Input(shape=(rhythm_embed_size, ), name="bar_rhythm_embedded") if self.use_meta: meta_cat = Input(shape=(self.n_voices,), name="metaData") lead = Input(shape=(None, m), name="lead") lead_enc = self.get_lead_encoder(melody_encoder) #encode processed = melody_encoder(prev_melodies) processed_with_rhythms = Concat([processed, bar_embedding]) # decode if dec_use_meta: processed_with_rhythms = Concat([processed_with_rhythms, meta_cat]) lead_processed = lead_enc(lead) processed_with_lead = Concat([processed_with_rhythms, lead_processed]) proc_repeated = RepeatVector(m)(processed_with_lead) lstm_outputs = LSTM(dec_lstm_size, return_sequences=True)(proc_repeated) preds = TimeDistributed(Dense(V, activation="softmax"))(lstm_outputs) self.params = [melody_encoder.params, rhythm_embed_size, dec_lstm_size, V, enc_use_meta, dec_use_meta] # self.params = [V, rhythm_embed_size, # dec_lstm_size, # enc_use_meta, dec_use_meta] if self.use_meta: super().__init__(inputs=[prev_melodies, bar_embedding, meta_cat, lead], outputs=preds, name=repr(self)) else: super().__init__(inputs=[prev_melodies, bar_embedding, lead], outputs=preds, name=repr(self)) self.encoder = melody_encoder if compile_now: self.compile_default()
def __init__(self, m, V, rhythm_embed_size, conv_f, conv_win_size, enc_lstm_size, dec_lstm_1_size, dec_lstm_2_size, meta_len, compile_now=False): self.n_voices = 6 prev_melodies = Input(shape=(None, m), name="contexts") bar_embedding = Input(shape=(rhythm_embed_size, ), name="bar_rhythm_embedded") meta_data = Input(shape=(meta_len, ), name="metaData") meta_cat = Dense(self.n_voices, activation="softmax")(meta_data) #encode conved = Conv1D(filters=conv_f, kernel_size=conv_win_size)(prev_melodies) meta_repeated = Lambda(self._repeat, output_shape=(None, self.n_voices))( [conved, meta_cat]) conved_with_meta = Concat([conved, meta_repeated], axis=-1) processed = LSTM(enc_lstm_size)(conved_with_meta) # decode proc_rhythm_concat = Concat([processed, bar_embedding]) proc_repeated = RepeatVector(m)(proc_rhythm_concat) lstm1_outputs = LSTM(dec_lstm_1_size, return_sequences=True)(proc_repeated) lstm2_outputs = LSTM(dec_lstm_2_size, return_sequences=True)(lstm1_outputs) preds = TimeDistributed(Dense(V, activation="softmax"))(lstm2_outputs) super().__init__(inputs=[prev_melodies, bar_embedding, meta_data], outputs=preds) self.params = [ m, V, rhythm_embed_size, conv_f, conv_win_size, enc_lstm_size, dec_lstm_1_size, dec_lstm_2_size, meta_len ] if compile_now: self.compile_default()
def __init__(self, melody_encoder, dense_size, V, compile_now=False): m = melody_encoder.m self.n_voices = 10 root_note = Input(shape=(1, ), name="root_note") melody_context = Input(shape=(None, m), name="bar_melody") meta_embedded = Input(shape=(self.n_voices, ), name="meta_embedded") root_encoded = Dense(dense_size)(root_note) context_encoded = melody_encoder(melody_context) inputs_concat = Concat([root_encoded, context_encoded, meta_embedded]) decoded = Dense(dense_size)(inputs_concat) preds = Dense(V, activation="softmax")(decoded) super().__init__(inputs=[root_note, melody_context, meta_embedded], outputs=preds) self.params = [dense_size, V] self.melody_encoder = melody_encoder if compile_now: self.compile_default()
def __init__(self, rhythm_encoder, dec_lstm_size, V, enc_use_meta=False, dec_use_meta=False, compile_now=False): self.n_voices = 8 context_size = rhythm_encoder.context_size encoded_size = rhythm_encoder.encoding_size bar_embedder = rhythm_encoder.bar_embedder bar_embed_size = rhythm_encoder.bar_embedder.embedding_size prev_bars = [Input(shape=(None,), name="context_" + str(i)) for i in range(context_size)] if enc_use_meta or dec_use_meta: meta_cat = Input(shape=(None,), name="metaData") lead = Input(shape=(None, ), name="lead") lead_embedded = bar_embedder(lead) # encode embeddings_processed = rhythm_encoder(prev_bars) # decode if dec_use_meta: encoded_size += self.n_voices encoded_size += bar_embed_size embeddings_processed = Concat([embeddings_processed, meta_cat, lead_embedded]) repeated = Lambda(self._repeat, output_shape=(None, encoded_size))\ ([prev_bars[0], embeddings_processed]) decoded = LSTM(dec_lstm_size, return_sequences=True, name='dec_lstm')(repeated) preds = TimeDistributed(Dense(V, activation='softmax'), name='softmax_layer')(decoded) self.params = [rhythm_encoder.params, dec_lstm_size, V, enc_use_meta, dec_use_meta] # self.params = [V, context_size, dec_lstm_size, # enc_use_meta, dec_use_meta] self.use_meta = enc_use_meta or dec_use_meta if enc_use_meta or dec_use_meta: super().__init__(inputs=[*prev_bars, meta_cat, lead], outputs=preds, name=repr(self)) else: super().__init__(inputs=[prev_bars, lead], outputs=preds, name=repr(self)) if compile_now: self.compile_default()
def inception_3b(input_t, fc1, fc2_in, fc2_out, fc3_in, fc3_out, fc4_out): layer_1x1 = Conv2D(fc1, (1,1), padding='same', activation='relu')(input_t) layer_3x3 = Conv2D(fc2_in, (1,1), padding='same', activation='relu')(input_t) layer_3x3 = Conv2D(fc2_out, (3,3), padding='same', activation='relu')(layer_3x3) layer_5x5 = Conv2D(fc3_in, (1,1), padding='same', activation='relu')(input_t) layer_5x5 = Conv2D(fc3_out, (5,5), padding='same', activation='relu')(layer_5x5) layer_pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(input_t) layer_pool = Conv2D(fc4_out, (1,1), padding='same', activation='relu')(layer_pool) output = Concat([layer_1x1, layer_3x3, layer_5x5, layer_pool], axis = -1) return output
def __init__(self, bar_embedder, context_size, enc_lstm_size, dec_lstm_size, meta_len, compile_now=False): self.num_calls = 0 self.n_voices = 6 prev_bars = [Input(shape=(None,), name="context_" + str(i)) for i in range(context_size)] meta_data = Input(shape=(meta_len,), name="metaData") meta_cat = Dense(self.n_voices, activation="softmax")(meta_data) # embed embeddings_with_meta = [Concat([bar_embedder(pb), meta_cat]) for pb in prev_bars] embeddings_stacked = Lambda(lambda ls: K.stack(ls, axis=1), output_shape=(context_size, bar_embedder.embedding_size + self.n_voices) )(embeddings_with_meta) # encode embeddings_processed = LSTM(enc_lstm_size)(embeddings_stacked) # decode repeated = Lambda(self._repeat, output_shape=(None, enc_lstm_size))\ ([prev_bars[0], embeddings_processed]) decoded = LSTM(dec_lstm_size, return_sequences=True, name='dec_lstm')(repeated) pred = TimeDistributed(Dense(bar_embedder.vocab_size, activation='softmax'), name='softmax_layer')(decoded) super().__init__(inputs=[*prev_bars, meta_data], outputs=pred) self.params = [context_size, enc_lstm_size, dec_lstm_size, meta_len] if compile_now: self.compile_default()
x13 = Conv2D(32, 3, activation="relu", padding="same")(x12) x1 = Dropout(0.15 * dropout)(x13) x21 = Conv2D(8, 5, activation="relu", padding="same")(normal) xmp2 = MaxPooling2D(pool_size=(1, 2))(x21) x22 = Conv2D(16, 5, activation="relu", padding="same")(xmp2) x23 = Conv2D(32, 5, activation="relu", padding="same")(x22) x2 = Dropout(0.15 * dropout)(x23) x31 = Conv2D(8, 5, activation="relu", padding="same")(normal) xmp3 = MaxPooling2D(pool_size=(1, 2))(x31) x32 = Conv2D(16, 3, activation="relu", padding="same")(xmp3) x33 = Conv2D(32, 1, activation="relu", padding="same")(x32) x3 = Dropout(0.15 * dropout)(x33) inception = Concat([x1, x2, x3], axis=3) x = Conv2D(32, 6, activation="relu", padding="same")(inception) x = MaxPooling2D(pool_size=(1, 2))(x) x = Dropout(0.3 * dropout)(x) x = Conv2D(32, 5, activation="relu", padding="same")(x) x = Dropout(0.3 * dropout)(x) x = Conv2D(32, 4, activation="relu", padding="same")(x) x = MaxPooling2D(pool_size=(1, 2))(x) x = Dropout(0.3 * dropout)(x) x = Conv2D(32, 3, activation="relu", padding="same")(x) x = Dropout(0.3 * dropout)(x)