def make_classifier_combined_morphingaware(n_hidden_layers=2, hidden_layer_size=100, activation='tanh', dropout_prob=0.0, alpha=5., learn_log_r=False, epsilon=1.e-4, learning_rate=1.e-3): """ Builds a Keras model for the morphing-aware version of the CASCAL technique. :param n_hidden_layers: Number of hidden layers. :param hidden_layer_size: Number of units in each hidden layer. :param activation: Activation function. :param dropout_prob: Dropout probability. :param alpha: RASCAL hyperparameter that weights the score term in the loss. :param learn_log_r: Fully connected network represents log r rather than s. :param epsilon: Parameter for numerical stability. :param learning_rate: Initial learning rate. :return: Keras model. """ # Inputs input_layer = Input(shape=(settings.n_thetas_features, )) x_layer = Lambda(lambda x: x[:, :settings.n_features], output_shape=(settings.n_features, ))(input_layer) theta_layer = Lambda(lambda x: x[:, -settings.n_params:], output_shape=(settings.n_params, ))(input_layer) # Morphing weights wtilde_layer = generate_wtilde_layer(theta_layer) wi_layer = generate_wi_layer(wtilde_layer) # Ratio estimators for each component ri_hat_layers = [] for i in range(settings.n_morphing_samples): hidden_layer = Dense(hidden_layer_size, activation=activation)(x_layer) if n_hidden_layers > 1: hidden_layer_ = build_hidden_layers( n_hidden_layers - 1, hidden_layer_size=hidden_layer_size, activation=activation, dropout_prob=dropout_prob) hidden_layer = hidden_layer_(hidden_layer) if learn_log_r: log_ri_hat_layer = Dense(1, activation='linear')(hidden_layer) ri_hat_layers.append(Lambda(lambda x: K.exp(x))(log_ri_hat_layer)) else: si_hat_layer = Dense(1, activation='sigmoid')(hidden_layer) ri_hat_layers.append( Reshape((1, ))( Lambda(lambda x: (1. - x) / (x + epsilon))(si_hat_layer))) ri_hat_layer = Concatenate()(ri_hat_layers) # Combine, clip, transform to \hat{s} wi_ri_hat_layer = Multiply()([wi_layer, ri_hat_layer]) r_hat_layer = Reshape( (1, ))(Lambda(lambda x: K.sum(x, axis=1))(wi_ri_hat_layer)) positive_r_hat_layer = Activation('relu')(r_hat_layer) s_hat_layer = Lambda(lambda x: 1. / (1. + x))(positive_r_hat_layer) # Score log_r_hat_layer = Lambda(lambda x: K.log(x + epsilon))( positive_r_hat_layer) gradient_layer = Lambda(lambda x: K.gradients(x[0], x[1])[0], output_shape=(settings.n_thetas_features, ))( [log_r_hat_layer, input_layer]) score_layer = Lambda(lambda x: x[:, -settings.n_params:], output_shape=(settings.n_params, ))(gradient_layer) # Combine outputs output_layer = Concatenate()( [s_hat_layer, log_r_hat_layer, score_layer, wi_layer, ri_hat_layer]) model = Model(inputs=[input_layer], outputs=[output_layer]) # Compile model model.compile(loss=lambda x, y: loss_function_combined(x, y, alpha=alpha), metrics=metrics, optimizer=optimizers.Adam(lr=learning_rate, clipnorm=1.)) return model
def make_regressor_morphingaware(n_hidden_layers=2, hidden_layer_size=100, activation='tanh', dropout_prob=0.0, factor_out_sm=True, epsilon=1.e-4, learning_rate=1.e-3): """ Builds a Keras model for the morphing-aware version of the ROLR technique. :param n_hidden_layers: Number of hidden layers. :param hidden_layer_size: Number of units in each hidden layer. :param activation: Activation function. :param dropout_prob: Dropout probability. :param factor_out_sm: If True, the SM and the deviation from it are modelled separately and then added. :param epsilon: Parameter for numerical stability. :param learning_rate: Initial learning rate. :return: Keras model. """ # Inputs input_layer = Input(shape=(settings.n_thetas_features, )) x_layer = Lambda(lambda x: x[:, :settings.n_features], output_shape=(settings.n_features, ))(input_layer) theta_layer = Lambda(lambda x: x[:, -settings.n_params:], output_shape=(settings.n_params, ))(input_layer) # Morphing weights wtilde_layer = generate_wtilde_layer(theta_layer) wi_layer = generate_wi_layer(wtilde_layer) # Log ratio estimator for SM if factor_out_sm: hidden_layer = Dense(hidden_layer_size, activation=activation)(input_layer) if n_hidden_layers > 1: hidden_layer_ = build_hidden_layers( n_hidden_layers - 1, hidden_layer_size=hidden_layer_size, activation=activation, dropout_prob=dropout_prob) hidden_layer = hidden_layer_(hidden_layer) log_r0_hat_layer = Dense(1, activation='linear')(hidden_layer) r0_hat_layer = Lambda(lambda x: K.exp(x))(log_r0_hat_layer) # Log ratio estimators for each component ri_hat_layers = [] for i in range(settings.n_morphing_samples): hidden_layer = Dense(hidden_layer_size, activation=activation)(x_layer) if n_hidden_layers > 1: hidden_layer_ = build_hidden_layers( n_hidden_layers - 1, hidden_layer_size=hidden_layer_size, activation=activation, dropout_prob=dropout_prob) hidden_layer = hidden_layer_(hidden_layer) if factor_out_sm: delta_ri_hat_layer = Dense(1, activation='linear')(hidden_layer) ri_hat_layer = Add()([delta_ri_hat_layer, r0_hat_layer]) else: log_ri_hat_layer = Dense(1, activation='linear')(hidden_layer) ri_hat_layer = Lambda(lambda x: K.exp(x))(log_ri_hat_layer) ri_hat_layers.append(Reshape((1, ))(ri_hat_layer)) ri_hat_layer = Concatenate()(ri_hat_layers) # Combine, clip, transform to \hat{s} wi_ri_hat_layer = Multiply()([wi_layer, ri_hat_layer]) r_hat_layer = Reshape( (1, ))(Lambda(lambda x: K.sum(x, axis=1))(wi_ri_hat_layer)) positive_r_hat_layer = Activation('relu')(r_hat_layer) # Translate to s s_hat_layer = Lambda(lambda x: 1. / (1. + x))(positive_r_hat_layer) # Score log_r_hat_layer = Lambda(lambda x: K.log(x + epsilon))( positive_r_hat_layer) gradient_layer = Lambda(lambda x: K.gradients(x[0], x[1])[0], output_shape=(settings.n_thetas_features, ))( [log_r_hat_layer, input_layer]) score_layer = Lambda(lambda x: x[:, -settings.n_params:], output_shape=(settings.n_params, ))(gradient_layer) # Combine outputs output_layer = Concatenate()( [s_hat_layer, log_r_hat_layer, score_layer, wi_layer, ri_hat_layer]) model = Model(inputs=[input_layer], outputs=[output_layer]) # Compile model model.compile(loss=loss_function_ratio_regression, metrics=metrics, optimizer=optimizers.Adam(lr=learning_rate, clipnorm=10.)) return model