def load_model(self, hidden_size=1024, is_bow=False): sequence_input = Input(shape=(self.vae.MAX_SEQUENCE_LENGTH, ), dtype='int32') token_embedding = self.vae.embedding_layer(sequence_input) self.vae.embedding_layer.trainable = False if not is_bow: for layer in self.vae.model_vae.layers: layer.trainable = False def get_embedding(sequence_embedding): h = self.vae.get_h(sequence_embedding) return self.vae.get_embedding(h) else: def get_embedding(sequence_embedding): return K.mean(sequence_embedding, axis=1) get_embedding_lambda = Lambda(get_embedding, name='get_embedding') get_embedding_lambda.trainable = False sequence_embedding = get_embedding_lambda(token_embedding) image_z = Dropout(0.2)(self.inception_base_model.layers[-2].output) combined = Concatenate(axis=1)([sequence_embedding, image_z]) hidden = Dropout(0.2)(BatchNormalization()(Dense( hidden_size, activation='relu')(combined))) predict = Dense(1, activation='sigmoid')(hidden) self.model = Model([sequence_input, self.inception_base_model.input], predict) self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[ 'accuracy', metrics.recall, metrics.precision, metrics.f1 ]) loss = K.binary_crossentropy(K.ones(tf.shape(predict)), predict) self.input_grads = K.function( [sequence_input, self.inception_base_model.input], K.gradients(loss, token_embedding)) self.embedding_grads = K.function( [sequence_embedding, self.inception_base_model.input], K.gradients(loss, sequence_embedding))
def build(self): question = self.question answer = self.get_answer() rnn_model = get_model( question_maxlen=self.model_params.get('question_len', 20), answer_maxlen=self.model_params.get('question_len', 60), vocab_len=self.config['n_words'], n_hidden=256, load_save=True) rnn_model.trainable = False answer_inverted = rnn_model(answer) argmax = Lambda(lambda x: K.argmax(x, axis=2), output_shape=lambda x: (x[0], x[1])) argmax.trainable = False answer_argmax = argmax(answer_inverted) # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding( input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), # W_regularizer=regularizers.activity_l1(1e-4), W_constraint=constraints.nonneg(), weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer_argmax) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_maxpool = maxpool(question_embedding) answer_maxpool = maxpool(answer_embedding) # activation activation = Activation('linear') question_output = activation(question_maxpool) answer_output = activation(answer_maxpool) return question_output, answer_output
def build(self): question = self.question answer = self.get_answer() rnn_model = get_model(question_maxlen=self.model_params.get('question_len', 20), answer_maxlen=self.model_params.get('question_len', 60), vocab_len=self.config['n_words'], n_hidden=256, load_save=True) rnn_model.trainable = False answer_inverted = rnn_model(answer) argmax = Lambda(lambda x: K.argmax(x, axis=2), output_shape=lambda x: (x[0], x[1])) argmax.trainable = False answer_argmax = argmax(answer_inverted) # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), # W_regularizer=regularizers.activity_l1(1e-4), W_constraint=constraints.nonneg(), dropout=0.5, weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer_argmax) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_maxpool = maxpool(question_embedding) answer_maxpool = maxpool(answer_embedding) # activation activation = Activation('linear') question_output = activation(question_maxpool) answer_output = activation(answer_maxpool) return question_output, answer_output
def load_model(self, hidden_size=384): sequence_input_1 = Input(shape=(self.vae.MAX_SEQUENCE_LENGTH, ), dtype='int32') sequence_input_2 = Input(shape=(self.vae.MAX_SEQUENCE_LENGTH, ), dtype='int32') sequence_embedding_1 = self.vae.embedding_layer(sequence_input_1) sequence_embedding_2 = self.vae.embedding_layer(sequence_input_2) self.vae.encoder_layer.trainable = False self.vae.embedding_layer.trainable = False def get_mu(sequence_embedding): h = self.vae.get_h(sequence_embedding) return self.vae.z_h_mean_layer(h) get_mu_lambda = Lambda(get_mu, name='get_mu') get_mu_lambda.trainable = False sequence_embedding_1 = get_mu_lambda(sequence_embedding_1) sequence_embedding_2 = get_mu_lambda(sequence_embedding_2) combined = Concatenate(axis=1)( [sequence_embedding_1, sequence_embedding_2]) hidden = Dropout(0.2)(BatchNormalization()(Dense( hidden_size, activation='relu')(combined))) predict = Dense(1, activation='sigmoid')(hidden) self.model = Model([sequence_input_1, sequence_input_2], predict) self.model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[ 'accuracy', metrics.recall, metrics.precision, metrics.f1 ])
def make_function_list(self, index=0): stats_list = [] act_list = [] addl_list = [] #latent_dim = latent_dim if latent_dim is not None else self.latent_dim if self.type == 'echo' and not self.layer_kwargs.get( 'd_max', False) and self.k != 1: self.layer_kwargs['d_max'] = self.k # echo params fed via kwargs, otherwise defaults (i.e. k = 1, no d_max => default) self.k == 1 # ensures only one z_activation layer for samp in range(self.k): net = 'Enc' if self.encoder else 'Dec' name_suffix = str(net) + '_' + str(index) + '_' + str( samp) if self.k > 1 else str(net) + '_' + str(index) if self.type in ['add', 'vae', 'additive']: if samp == 0: z_mean = Dense(self.latent_dim, activation='linear', name='z_mean' + name_suffix, **self.layer_kwargs) z_logvar = Dense(self.latent_dim, activation='linear', name='z_var' + name_suffix, **self.layer_kwargs) stats_list.append([z_mean, z_logvar]) # NOTE: using layer_kwargs for dense... samplers don't need any here z_act = Lambda(layers.vae_sample, name='z_act_' + name_suffix) #arguments = self.layer_kwargs) act_list.append(z_act) elif self.type in ['mul', 'ido', 'multiplicative']: if samp == 0: z_mean = Dense(self.latent_dim, activation='linear', name='z_mean' + name_suffix, **self.layer_kwargs) z_logvar = Dense(self.latent_dim, activation='linear', name='z_var' + name_suffix, **self.layer_kwargs) stats_list.append([z_mean, z_logvar]) z_act = Lambda(layers.ido_sample, name='z_act_' + name_suffix, arguments=self.layer_kwargs) act_list.append(z_act) elif self.type in ['echo']: if samp == 0: # slightly different here... layer_kwargs used for echo / lambda z_mean = Dense(self.latent_dim, activation='linear', name='z_mean' + name_suffix) # **self.layer_kwargs) z_act = layers.Echo(name='echo_act' + name_suffix, **self.layer_kwargs) z_act.trainable = self.layer_kwargs['trainable'] print("Trainable ? ", z_act.trainable, self.layer_kwargs['trainable'], z_act) #z_act = Lambda(layers.echo_sample, name = 'z_act_'+name_suffix, arguments = self.layer_kwargs) echo_noise = Lambda(z_act.get_noise, name='noise' + name_suffix) capacity = Lambda(z_act.get_capacity, name='capacity' + name_suffix) #capacity = Lambda(layers.echo_capacity, name = 'capacity'+name_suffix, arguments = {'init': self.layer_kwargs['init']})#, 'batch': self.layer_kwargs['batch']}) # note: k = 1 if k used as d_max, otherwise will have k separate layer calls # tf.get_variable self.layer_kwargs['init'] stats_list.append([z_mean]) act_list.append(z_act) addl_list.append([capacity, echo_noise]) elif self.type in ['bir', 'constant_additive']: if samp == 0: z_mean = Dense(self.latent_dim, activation='linear', name='z_mean' + name_suffix) mi = self.layer_kwargs.get('mi', None) if mi is None: var = self.layer_kwargs.get( 'variance', self.layer_kwargs.get( 'sigma', self.layer_kwargs.get('noise', None))) if var is None: raise ValueError( "Please enter layer_kwarg['mi'] or ['variance'] for bounded rate autoencoder" ) else: var = -2 * mi / self.latent_dim self.layer_kwargs['variance'] = var z_logvar = Lambda(layers.constant_layer, name='z_var' + name_suffix, arguments={ 'variance': self.layer_kwargs['variance'], 'batch': self.layer_kwargs['batch'] }) #K.constant(np.log(var), shape = (self.latent_dim,), name = 'z_var'+name_suffix) #Dense(self.latent_dim, activation='linear', name='z_var'+name_suffix, **self.layer_kwargs) stats_list.append([z_mean, z_logvar]) z_act = Lambda(layers.vae_sample, name='z_act_' + name_suffix) act_list.append(z_act) elif self.type in ['autoregressive_flow', 'af', 'ar_flow', 'arf']: #if samp == 0: flow = AR_flow(name='ar_flow' + name_suffix) density = Lambda(flow.get_density, name='density' + name_suffix) act_list.append(flow) addl_list.append(density) elif self.type in ['iaf', 'inverse_flow']: z_mean = Dense(self.latent_dim, activation='linear', name='z_mean' + name_suffix) #**self.layer_kwargs) z_logvar = Dense(self.latent_dim, activation='linear', name='z_var' + name_suffix) # **self.layer_kwargs) stats_list.append([z_mean, z_logvar]) self.try_activations(kw=True) iaf = layers.IAF(name='z_act' + name_suffix, **self.layer_kwargs) iaf_density = Lambda(iaf.get_density, name='iaf_conditional' + name_suffix) #print('*** CONSTANT JACOBIAN IAF ???? ***', iaf.is_constant_jacobian) act_list.append(iaf) addl_list.append( iaf_density ) # doesn't matter what it calls? sample to be safe elif self.type in [ 'maf', 'masked_arf' ]: #, 'gaussian_af', 'gaussian_arf', 'gaussian_maf']: tf_made = True call_on_addl = [] if tf_made: # args['steps']= self.layer_kwargs.get('steps', 1) # args['layers'] = self.layer_kwargs.get('layers', 1) # args['mean_only'] = self.layer_kwargs.get('mean_only', 1) # args['activation'] = self.layer_kwargs.get('activation', 'relu') self.try_activations(kw=True) print(name_suffix) self.layer_kwargs["name"] = 'maf_density' + str( name_suffix) reshape = Reshape((self.latent_dim, ), name='conv_reshape' + name_suffix) #if len(K.int_shape(z_mean)) > 2: # tf.reduce_prod(tf.shape(z_mean)[1:]) # z_mean = Reshape([-1, dim])(z_mean) #z_mean = Reshape([-1, *z_mean._keras_shape[1:]])(z_mean) #z_mean = Flatten()(z_mean) maf = Lambda(layers.tf_masked_flow, arguments=self.layer_kwargs ) #, name = 'masked_flow'+name_suffix) if self.layer_kwargs.get("noise_estimator", False): self.layer_kwargs["name"] = 'maf_noise_density' + str( name_suffix) maf2 = Lambda(layers.tf_masked_flow, arguments=self.layer_kwargs) call_on_addl.append(maf) stats_list.append([reshape]) act_list.append(maf) # directly gives log probability! (from input z) else: made_network = layers.MADE_network( steps=self.layer_kwargs.get('steps', 1), layers=self.layer_kwargs.get('layers', 1), mean_only=self.layer_kwargs.get('mean_only', 1), name='made_network' + name_suffix) made_jacobian = Lambda(made_network.get_log_det_jac, name='made_jac' + name_suffix) # prob z, to be fed to loss (treat this as recon?) act_list.append(made_network) addl_list.append(made_jacobian) else: # import layer module by string (can be specified either in activation or layer_kwargs) try: #self.try_activations() spl = str(self.activation).split('.') if len(spl) > 1: path = '.'.join(spl[:-1]) mod = importlib.import_module(path) mod = getattr(mod, spl[-1]) self.layer_kwargs['activation'] = mod else: if not self.layer_kwargs.get('activation', True): self.layer_kwargs['activation'] = self.activation except Exception as e: print() print("trying activationfor layer ", self.type) print( e ) #raise NotImplementedError("Coding error in importing activation. Specify as Keras activation str or module.function") print() try: self.try_activations(kw=True) except Exception as e: print() print("trying layer kw activation for ", self.type) print( e ) #raise NotImplementedError("Coding error in importing activation. Specify as Keras activation str or module.function") print() try: spl = str(self.type).split('.') if len(spl) > 1: path = '.'.join(spl[:-1]) mod = importlib.import_module(path) self.layer_kwargs['name'] = str(path + name_suffix) mod = getattr(mod, spl[-1]) z_act = mod(self.latent_dim, **self.layer_kwargs) else: mod = importlib.import_module(str('keras.layers')) #mod = importlib.import_module(str('tensorflow.python.keras.layers')) self.layer_kwargs['name'] = str(self.type + name_suffix) if self.type == 'Dense': z_act = Dense(self.latent_dim, **self.layer_kwargs) else: z_act = getattr(mod, self.type) z_act = z_act(self.latent_dim, **self.layer_kwargs) except: raise AttributeError("Error Importing Activation ", self.type) act_list.append(z_act) try: return { 'stat': stats_list, 'act': act_list, 'addl': addl_list, 'call_on_addl': call_on_addl } except: return {'stat': stats_list, 'act': act_list, 'addl': addl_list}
def compile_model(self): """ Compiles the Model. Args: None Returns: None """ # the Sampling function for the VAE def sampling(args): z_mean, z_log_sigma = args epsilon = K.random_normal( shape=(K.shape(z_mean)[0], self.params.encodedSize)) return z_mean + K.exp(z_log_sigma) * epsilon # Loss function for the VAE # Loss function comprised of two parts, Cross_entropy, and # divergence def vae_loss(inputs, finalLayer): reconstruction_loss = K.sum(K.square(finalLayer - inputs)) kl_loss = - 0.5 * K.sum(1 + z_log_sigmaFull - K.square( z_meanFull) - K.square(K.exp(z_log_sigmaFull)), axis=-1) total_loss = K.mean(reconstruction_loss + kl_loss) return total_loss # Loss function for the left VAE # Loss function comprised of two parts, Cross_entropy, and # divergence def left_vae_loss(inputs, finalLayer): reconstruction_loss = K.sum(K.square(finalLayer - inputs)) kl_loss = - 0.5 * K.sum(1 + z_log_sigmaLeft - K.square( z_meanLeft) - K.square(K.exp(z_log_sigmaLeft)), axis=-1) total_loss = K.mean(reconstruction_loss + kl_loss) return total_loss # Loss function for the right VAE # Loss function comprised of two parts, Cross_entropy, and # divergence def right_vae_loss(inputs, finalLayer): reconstruction_loss = K.sum(K.square(finalLayer - inputs)) kl_loss = - 0.5 * K.sum(1 + z_log_sigmaRight - K.square( z_meanRight) - K.square(K.exp(z_log_sigmaRight)), axis=-1) total_loss = K.mean(reconstruction_loss + kl_loss) return total_loss # Define the Encoder with the Left and Right branches leftEncoderInput = Input(shape=(self.params.inputSizeLeft,)) leftEncoderFirstLayer = Dense( self.params.firstLayerSizeLeft, activation='relu')(leftEncoderInput) leftEncoderSecondLayer = Dense( self.params.secondLayerSize, activation='relu')(leftEncoderFirstLayer) rightEncoderInput = Input(shape=(self.params.inputSizeRight,)) rightEncoderFirstLayer = Dense( self.params.firstLayerSizeRight, activation='relu')(rightEncoderInput) rightEncoderSecondLayer = Dense( self.params.secondLayerSize, activation='relu')(rightEncoderFirstLayer) encoderMergeLayer = Dense( self.params.thirdLayerSize, activation='relu') leftMerge = encoderMergeLayer(leftEncoderSecondLayer) rightMerge = encoderMergeLayer(rightEncoderSecondLayer) # These different merge branches are used in different models mergedLayer = keras.layers.average([leftMerge, rightMerge]) leftMergedLayer = keras.layers.average([leftMerge, leftMerge]) rightMergedLayer = keras.layers.average( [rightMerge, rightMerge]) z_mean = Dense(self.params.encodedSize) z_log_sigma = Dense(self.params.encodedSize) # These three sets are used in differen models z_meanLeft = z_mean(leftMergedLayer) z_log_sigmaLeft = z_log_sigma(leftMergedLayer) z_meanRight = z_mean(rightMergedLayer) z_log_sigmaRight = z_log_sigma(rightMergedLayer) z_meanFull = z_mean(mergedLayer) z_log_sigmaFull = z_log_sigma(mergedLayer) zLeft = Lambda(sampling)([z_meanLeft, z_log_sigmaLeft]) zRight = Lambda(sampling)([z_meanRight, z_log_sigmaRight]) zFull = Lambda(sampling)([z_meanFull, z_log_sigmaFull]) # These are the three different models leftEncoder = Model(leftEncoderInput, zLeft) rightEncoder = Model(rightEncoderInput, zRight) self.fullEncoder = Model( [leftEncoderInput, rightEncoderInput], zFull) # Defining the Decoder with Left and Right Outputs decoderInputs = Input(shape=(self.params.encodedSize,)) decoderFirstLayer = Dense( self.params.thirdLayerSize, activation='relu')(decoderInputs) leftDecoderSecondLayer = Dense( self.params.secondLayerSize, activation='relu')(decoderFirstLayer) leftDecoderThirdLayer = Dense( self.params.firstLayerSizeLeft, activation='relu')(leftDecoderSecondLayer) leftDecoderOutput = Dense( self.params.inputSizeLeft, activation='sigmoid')(leftDecoderThirdLayer) rightDecoderSecondLayer = Dense( self.params.secondLayerSize, activation='relu')(decoderFirstLayer) rightDecoderThirdLayer = Dense( self.params.firstLayerSizeRight, activation='relu')(rightDecoderSecondLayer) rightDecoderOutput = Dense( self.params.inputSizeRight, activation='sigmoid')(rightDecoderThirdLayer) # Three different Decoders self.fullDecoder = Model( decoderInputs, [leftDecoderOutput, rightDecoderOutput]) leftDeocder = Model(decoderInputs, leftDecoderOutput) rightDecoder = Model(decoderInputs, rightDecoderOutput) # decoder.summary() # Left to Right transition outputs = self.fullDecoder(leftEncoder(leftEncoderInput)) self.leftToRightModel = Model(leftEncoderInput, outputs) # leftToRightModel.summary() # Right to Left transition outputs = self.fullDecoder(rightEncoder(rightEncoderInput)) self.rightToLeftModel = Model(rightEncoderInput, outputs) # rightToLeftModel.summary() # Full Model outputs = self.fullDecoder(self.fullEncoder( [leftEncoderInput, rightEncoderInput])) # Create the full model self.vae_model = Model( [leftEncoderInput, rightEncoderInput], outputs) lowLearnAdam = keras.optimizers.Adam( lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) self.vae_model.compile(optimizer=lowLearnAdam, loss=vae_loss) # Compile # vae_model.summary() # Freeze all shared layers leftMerge.trainable = False rightMerge.trainable = False mergedLayer.trainable = False leftMergedLayer.trainable = False rightMergedLayer.trainable = False z_meanLeft.trainable = False z_log_sigmaLeft.trainable = False z_meanRight.trainable = False z_log_sigmaRight.trainable = False z_meanFull.trainable = False z_log_sigmaFull.trainable = False zLeft.trainable = False zRight.trainable = False zFull.trainable = False decoderFirstLayer.trainable = False # Left VAE model which can't train middle outputs = leftDeocder(leftEncoder(leftEncoderInput)) self.leftModel = Model(leftEncoderInput, outputs) self.leftModel.compile( optimizer=lowLearnAdam, loss=left_vae_loss) # Right VAE model which can't train middle outputs = rightDecoder(rightEncoder(rightEncoderInput)) self.rightModel = Model(rightEncoderInput, outputs) self.rightModel.compile( optimizer=lowLearnAdam, loss=right_vae_loss) # Make shared layers trainable leftMerge.trainable = True rightMerge.trainable = True mergedLayer.trainable = True leftMergedLayer.trainable = True rightMergedLayer.trainable = True z_meanLeft.trainable = True z_log_sigmaLeft.trainable = True z_meanRight.trainable = True z_log_sigmaRight.trainable = True z_meanFull.trainable = True z_log_sigmaFull.trainable = True zLeft.trainable = True zRight.trainable = True zFull.trainable = True decoderFirstLayer.trainable = True # Make separate layers frozen leftEncoderFirstLayer.trainable = False leftEncoderSecondLayer.trainable = False rightEncoderFirstLayer.trainable = False rightEncoderSecondLayer.trainable = False leftDecoderSecondLayer.trainable = False leftDecoderThirdLayer.trainable = False leftDecoderOutput.trainable = False rightDecoderSecondLayer.trainable = False rightDecoderThirdLayer.trainable = False rightDecoderOutput.trainable = False # Define center model outputs = self.fullDecoder(self.fullEncoder( [leftEncoderInput, rightEncoderInput])) # Create the center model self.centerModel = Model( [leftEncoderInput, rightEncoderInput], outputs) self.centerModel.compile( optimizer=lowLearnAdam, loss=vae_loss) # Compile plot_model(self.fullEncoder, to_file=os.path.join('Output', str(self.params.dataSetInfo.name), 'sharedVaeFullEncoder{}_{}_{}_\ {}_{}_{}_{}.png' .format(str(self.params.numEpochs), str(self.params. firstLayerSizeLeft), str(self.params.inputSizeLeft), str(self.params. secondLayerSize), str(self.params.encodedSize), str(self.params. firstLayerSizeRight), str(self.params. inputSizeRight))), show_shapes=True)