def inv_transform(self,y): inv_transform = lbann.WeightedSum( lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y),lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y),lbann.Identity(y))), scaling_factors=str(self.datascale)) linear_scale = 1/self.linear_scaler CH2 = lbann.Tanh(lbann.WeightedSum(inv_transform,scaling_factors=str(linear_scale))) return CH2
def __init__(self, size, bias = True, weights=[], name=None, data_layout='data_parallel'): """Initialize LSTM cell. Args: size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most two - a matrix ((4*size) x (input_size+size) dimensions) and a bias (4*size entries). If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'lstmcell<index>'. data_layout (str): Data layout. """ super().__init__() LSTMCell.global_count += 1 self.step = 0 self.size = size self.name = (name if name else 'lstmcell{0}'.format(LSTMCell.global_count)) self.data_layout = data_layout # Initial state self.last_output = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_output', data_layout=self.data_layout) self.last_cell = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_cell', data_layout=self.data_layout) # Weights self.weights = list(make_iterable(weights)) if len(self.weights) > 2: raise ValueError('`LSTMCell` has at most two weights, ' 'but got {0}'.format(len(self.weights))) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_matrix')) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_bias')) # Linearity self.fc = FullyConnectedModule(4*size, bias=bias, weights=self.weights, name=self.name + '_fc', data_layout=self.data_layout)
def f_invtransform(y, scale=4.0): ### Transform to original space ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(scale)) return inv_transform
def inv_transform(self, y): ### Original transformation ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) return inv_transform
def inv_transform(self, y): ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) #linear_scale = 1/self.linear_scaler #CH2 = lbann.Tanh(lbann.WeightedSum(inv_transform,scaling_factors=str(linear_scale))) #return CH2 return inv_transform
def forward(self, node_feature_mat, source_indices, target_indices): """Call the GatedGraphConv Args: node_feature_mat (Layer): Node feature matrix with the shape of (num_nodes,input_channels) source_indices (Layer): Source node indices of the edges with shape (num_nodes) target_indices (Layer): Target node indices of the edges with shape (num_nodes) Returns: (Layer) : The output after kernel ops. The output can passed into another Graph Conv layer directly """ if (self.input_channel_size < self.output_channel_size): num_zeros = self.output_channel_size - self.input_channel_size print(num_zeros) zeros = lbann.Constant(value = 0, num_neurons = str_list([self.num_nodes,num_zeros]), name = self.name+'_padded') node_feature_mat = lbann.Concatenation(node_feature_mat, zeros, axis = 1) elif (input_features > self.output_channel_size): ValueError('The feature size of the nodes {} cannot be greater than the output dimension {}'. format(input_features, self.output_channel_size)) for layer in range(self.num_layers): messages = self.nns[layer](node_feature_mat) neighborhoods = GraphExpand(messages, target_indices) aggregate = GraphReduce(neighborhoods,source_indices, [self.num_nodes, self.output_channel_size]) node_feature_mat = self.rnn(aggregate, node_feature_mat) return node_feature_mat
def forward(self, motif_size, motif_log_embeddings): """Predict whether a motif is real. @todo Numerically accurate computation of both log(D) and log(1-D). """ # D = 1 - exp(-sum_j(prod_i(d_ij))) # log(1-D) = -sum_j(exp(sum_i(log(d_ij)))) x = lbann.MatMul( lbann.Constant(value=1, num_neurons=str_list([1, motif_size])), motif_log_embeddings, ) x = lbann.Exp(x) x = lbann.Reduction(x, mode='sum') x = lbann.Negative(x) log_not_prob = x # Convert log-probability to linear space # Note: D=-expm1(x) is accurate when D~0. When D~1, prefer # 1-D=exp(x). prob = lbann.Negative(lbann.Expm1(log_not_prob)) return prob, log_not_prob
def constant(self, value, dims=[], datatype=None, hint_layer=None): return lbann.Constant( value=value, num_neurons=str_list(dims), datatype=datatype, hint_layer=hint_layer, )
def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, ): if attention_mask is None: attention_mask = lbann.Constant(value=1, num_neurons=str_list( self.attn_mask_shape)) if token_type_ids is None: token_type_ids = lbann.Constant(value=0, num_neurons=str_list( self.input_shape)) if head_mask is None: head_mask = [None] * self.config.num_hidden_layers input_ids = lbann.Reshape(input_ids, dims=str_list(self.input_shape)) embedding_output = self.embeddings( input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds, ) embedding_output = lbann.Reshape(embedding_output, dims=str_list(self.input_shape + (self.hidden_size, ))) encoder_output = self.encoder( embedding_output, attention_mask=attention_mask, head_mask=head_mask, ) pooled_output = self.pooler( encoder_output) if self.pooler is not None else None if pooled_output is not None: return pooled_output else: return encoder_output
def inv_transform(self, y): ### Original transformation ''' The inverse of the transformation function that scales the data before training ''' inv_transform = lbann.WeightedSum(lbann.SafeDivide( lbann.Add(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y)), lbann.Subtract(lbann.Constant(value=1.0, hint_layer=y), lbann.Identity(y))), scaling_factors=str(self.datascale)) return inv_transform # def inv_transform(self, y):### New tranformation : log-linear # threshold = lbann.Constant(value=0.5, hint_layer=y) # is_above_threshold = lbann.Greater(y, threshold) # is_below_threshold = lbann.LogicalNot(is_above_threshold) # below = lbann.SafeDivide( # lbann.Subtract(y, lbann.Constant(value=1, hint_layer=y)), # lbann.Constant(value=0.03, hint_layer=y), # ) # above = lbann.Exp(lbann.SafeDivide( # lbann.Subtract( # y, # lbann.Constant(value=0.5-0.5/math.log(300)*math.log(50), hint_layer=y)), # lbann.Constant(value=0.5/math.log(300), hint_layer=y), # )) # return lbann.Add( # lbann.Multiply(is_above_threshold, above), # lbann.Multiply(is_below_threshold, below), # ) # def f_invtransform_new(y): # if y<=0.5: # a=0.03;b=-1.0 # return (y-b)/a # elif y>0.5: # a=0.5/np.log(300) # b=0.5-a*np.log(50) # return np.exp((y-b)/a)
def construct_model(): """Construct LBANN model. Pilot1 Combo model """ import lbann # Layer graph data = lbann.Input(data_field='samples') responses = lbann.Input(data_field='responses') pred = combo.Combo()(data) mse = lbann.MeanSquaredError([responses, pred]) SS_res = lbann.Reduction(lbann.Square(lbann.Subtract(responses, pred)), mode='sum') #SS_tot = var(x) = mean((x-mean(x))^2) mini_batch_size = lbann.MiniBatchSize() mean = lbann.Divide(lbann.BatchwiseReduceSum(responses), mini_batch_size) SS_tot = lbann.Divide( lbann.BatchwiseReduceSum(lbann.Square(lbann.Subtract(responses, mean))), mini_batch_size) eps = lbann.Constant(value=1e-07, hint_layer=SS_tot) r2 = lbann.Subtract(lbann.Constant(value=1, num_neurons='1'), lbann.Divide(SS_res, lbann.Add(SS_tot, eps))) metrics = [lbann.Metric(mse, name='mse')] metrics.append(lbann.Metric(r2, name='r2')) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] # Construct model num_epochs = 100 layers = list(lbann.traverse_layer_graph([data, responses])) return lbann.Model(num_epochs, layers=layers, metrics=metrics, objective_function=mse, callbacks=callbacks)
def forward(self, inputs): raise NotImplementedError # Requires log-gamma function if len(inputs) != 2: raise ValueError('expected two inputs: predictions and labels') pred = inputs[0] label = inputs[1] ones = lbann.Constant(hint_layer=pred, value=1.0) term1 = pred term2 = lbann.Multiply([label, lbann.Log(pred)]) term3 = lbann.LogGamma(lbann.Add([label, ones])) full = lbann.WeightedSum([term1, term2, term3], scaling_factors='1.0 -1.0 1.0') return lbann.Reduction(full)
def __init__( self, hidden_size, num_layers=1, weights=[], name=None, device=None, datatype=None, weights_datatype=None, ): GRUModule.global_count += 1 self.instance = 0 self.hidden_size = hidden_size self.num_layers = num_layers self.name = name if name else f'gru{GRUModule.global_count}' self.device = device self.datatype = datatype # Construct weights if needed self.weights = weights if not self.weights: scale = 1 / math.sqrt(self.hidden_size) init = lbann.UniformInitializer(min=-scale,max=scale) if weights_datatype is None: weights_datatype = self.datatype self.weights = [] for i in range(self.num_layers): self.weights.extend( lbann.Weights( initializer=init, name=f'{self.name}_layer{i}_{weight_name}', datatype=weights_datatype, ) for weight_name in ('ih_matrix', 'hh_matrix', 'ih_bias', 'hh_bias') ) if self.weights and len(self.weights) != 4*self.num_layers: raise ValueError( f'expected {4*self.num_layers} weights, ' f'but recieved {len(self.weights)}' ) # Default initial hidden state self.zeros = lbann.Constant( value=0, num_neurons=str_list([num_layers, hidden_size]), name=f'{self.name}_zeros', device=self.device, datatype=self.datatype, )
def __init__(self, num_channels, size, bias=True, weights=[], name=None): """Initialize GRU cell. Args: num_channels (int): The number of rows in the matrix to perform GRU size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most four - two matrices ((3*size) x (input_size) and (3*size) x (size) dimensions) each and two biases (3*size entries) each. If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'gru<index>'. data_layout (str): Data layout. """ super().__init__() ChannelwiseGRU.global_count += 1 self.step = 0 self.size = size self.num_channels = num_channels self.name = (name if name else f'gru{ChannelwiseGRU.global_count}') self.data_layout = 'data_parallel' scale = 1 / math.sqrt(self.size) self.weights = list(make_iterable(weights)) weight_name = ['_ih_matrix', '_ih_bias', '_hh_matrix', '_hh_bias'] for i in range(4): if (len(self.weights) == i): self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer( min=-scale, max=scale), name=self.name + weight_name[i])) self.ih_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[:2], name=self.name + '_ih_fc') self.hh_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[2:], name=self.name + '_hh_fc') self.ones = lbann.Constant(value=1.0, num_neurons=str_list([num_channels, size]), name=self.name + '_ones')
def random_projection(indices, num_projections, projection_dim): # Expand input indices to get an index for each vector entry # Note: proj_indices(i) = index*projection_dim + i proj_indices = lbann.WeightedSum( indices, scaling_factors=utils.str_list(projection_dim), ) iota = lbann.WeightsLayer( dims=utils.str_list(projection_dim), weights=lbann.Weights( initializer=lbann.ValueInitializer( values=utils.str_list(range(projection_dim))), optimizer=lbann.NoOptimizer(), ), ) proj_indices = lbann.Sum( lbann.Tessellate( lbann.Reshape(proj_indices, dims=utils.str_list([num_projections, 1])), dims=utils.str_list([num_projections, projection_dim]), ), lbann.Tessellate( lbann.Reshape(iota, dims=utils.str_list([1, projection_dim])), dims=utils.str_list([num_projections, projection_dim]), ), ) # Apply hash function and convert to Gaussian distribution proj = lbann.UniformHash(proj_indices) ones = lbann.Constant( value=1, num_neurons=utils.str_list([num_projections, projection_dim]), ) eps = 0.001 proj = lbann.ErfInv( lbann.WeightedSum( proj, ones, scaling_factors=utils.str_list([2 * (1 - eps), -(1 - eps)]), )) proj = lbann.InstanceNorm(proj) proj = lbann.WeightedSum( proj, scaling_factors=utils.str_list(1 / projection_dim), ) return proj
def create_position_ids_from_input_ids(input_ids, input_shape, padding_idx, past_key_values_length=0): padding_idx = lbann.Constant(value=padding_idx, num_neurons=str_list(input_shape)) mask = lbann.NotEqual(input_ids, padding_idx) incremental_indices = lbann.Multiply( lbann.AddConstant( lbann.modules.Cumsum(mask, input_shape, axis=1), constant=past_key_values_length, ), mask, ) incremental_indices = lbann.Add(incremental_indices, padding_idx) return incremental_indices
def forward(self, node_feature_mat, source_indices, target_indices, activation=lbann.Relu): """Apply GIN Layer. Args: node_feature_mat (Layer): Node feature matrix with the shape of (num_nodes,input_channels) source_indices (Layer): Source node indices of the edges with shape (num_nodes) target_indices (Layer): Target node indices of the edges with shape (num_nodes activation (Layer): Activation layer for the node features. If None, then no activation is applied. (default: lbann.Relu) Returns: (Layer) : The output after kernel ops. The output can passed into another Graph Conv layer directly """ eps = lbann.Constant(value=(1 + self.eps), num_neurons=str_list( [self.num_nodes, self.input_channel_size])) eps_node_features = lbann.Multiply(node_feature_mat, eps, name=self.name + "_epl_mult") node_feature_mat = lbann.Sum(eps_node_features, node_feature_mat) # Transform with the sequence of linear layers for layer in self.nn: node_feature_mat = layer(node_feature_mat) neighborhoods = GraphExpand(node_feature_mat, target_indices) neighborhoods = lbann.Reshape( neighborhoods, dims=str_list([self.num_edges, self.output_channel_size])) aggregated_node_features = GraphReduce( neighborhoods, source_indices, [self.num_nodes, self.output_channel_size]) ## Apply activation if activation: aggregated_node_features = activation(aggregated_node_features) return aggregated_node_features
def forward(self, X, A): """Call the GatedGraphConv Args: X (GraphVertexData): LBANN Data object, which is a collection of Layers. Each Layer is of the shape (1,input_channels) A (Layer): Adjacency matrix input with shape (num_nodes, num_nodes) Returns: LBANN_Data_Mat: The output after Gated Graph Kernel. The output can passed into another Graph Conv layer directly """ input_features = X.size(1) num_nodes = X.size(0) if (input_features < self.output_channels): for i in range(num_nodes): num_zeros = self.output_channels - input_features zeros = lbann.Constant(value=0, num_neurons=str_list([1, num_zeros]), name=self.name + '_zero_' + str(i)) X[i] = lbann.Concatenation(X[i], zeros, axis=1) elif (input_features > self.output_channels): ValueError( 'The feature size of the nodes {} cannot be greater than the output dimension {}' .format(input_features, self.output_channels)) X.update_num_features(self.output_channels) for layer in range(self.num_layers): ## X_mat = X.get_mat() messages = lbann.MatMul(X_mat, self.weights[layer]) aggregate = lbann.MatMul(A, messages) M = GraphVertexData.matrix_to_graph(aggregate, num_nodes, self.output_channels) for i in range(num_nodes): X[i] = lbann.Reshape(X[i], dims=str(self.output_channels)) X[i] = lbann.Reshape(self.rnn(M[i], X[i])[1], dims=str_list([1, self.output_channels])) return X
def forward(self, X, A, activation = lbann.Relu): """Apply GIN Layer. Args: X (GraphVertexData): LBANN Data object, which is a collection of Layers. Each Layer is of the shape (1,input_channels) A (Layer): Adjacency matrix input with shape (num_nodes, num_nodes) activation (Layer): Activation layer for the node features. If None, then no activation is applied. (default: lbann.Relu) Returns: (GraphVertexData): The output after GCN. The output can passed into another Graph Conv layer directly """ in_channel = X.shape[1] # Accumulate Messages from Neighboring Nodes out = X.get_mat() out = lbann.MatMul(A,out, name = self.name+"_GIN_MATMUL") message = GraphVertexData.matrix_to_graph(out, X.shape[0], in_channel) # Aggregate Messages into node features eps = lbann.Constant(value=(1+self.eps),num_neurons = str_list([1, in_channel])) for node_feature in range(X.shape[0]): eps_val = lbann.Multiply(eps, X[node_feature]) X[node_feature] = lbann.Sum(message[node_feature], eps_val) # Transform with the sequence of linear layers for layer in self.nn: for node_feature in range(X.shape[0]): X[node_feature] = layer(X[node_feature]) ## Apply activation if activation: for node_feature in range(X.shape[0]): X[node_feature] = activation(X[node_feature]) X.update_num_features(self.output_channels) return X
mse = lbann.MeanSquaredError([responses, pred]) # Pearson Correlation # rho(x,y) = covariance(x,y) / sqrt( variance(x) * variance(y) ) pearson_r_cov = lbann.Covariance([pred, responses], name="pearson_r_cov") pearson_r_var1 = lbann.Variance(responses, name="pearson_r_var1") pearson_r_var2 = lbann.Variance(pred, name="pearson_r_var2") pearson_r_mult = lbann.Multiply([pearson_r_var1, pearson_r_var2], name="pearson_r_mult") pearson_r_sqrt = lbann.Sqrt(pearson_r_mult, name="pearson_r_sqrt") eps = lbann.Constant(value=1e-07, hint_layer=pearson_r_sqrt) pearson_r = lbann.Divide( [pearson_r_cov, lbann.Add(pearson_r_sqrt, eps)], name="pearson_r") metrics = [lbann.Metric(mse, name='mse')] metrics.append(lbann.Metric(pearson_r, name='pearson_r')) callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] layers = list(lbann.traverse_layer_graph([images, responses])) model = lbann.Model(args.num_epochs, layers=layers, metrics=metrics, objective_function=mse, callbacks=callbacks)
def construct_model(num_epochs,mcr,spectral_loss,save_batch_interval): """Construct LBANN model. """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_img') ### Create expected labels for real and fake data (with label flipping = 0.01) prob_flip=0.01 label_flip_rand = lbann.Uniform(min=0,max=1, neuron_dims='1') label_flip_prob = lbann.Constant(value=prob_flip, num_neurons='1') ones = lbann.GreaterEqual(label_flip_rand,label_flip_prob, name='is_real') zeros = lbann.LogicalNot(ones,name='is_fake') gen_ones=lbann.Constant(value=1.0,num_neurons='1')## All ones: no flip. Input for training Generator. #============================================== ### Implement GAN ##Create the noise vector z = lbann.Reshape(lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="64", name='noise_vec'),dims='1 64') ## Creating the GAN object and implementing forward pass for both networks ### d1_real, d1_fake, d_adv, gen_img, img = ExaGAN.CosmoGAN(mcr)(input,z,mcr) #============================================== ### Compute quantities for adding to Loss and Metrics d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,ones],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zeros],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,gen_ones],name='d_adv_bce') #img_loss = lbann.MeanSquaredError([gen_img,img]) #l1_loss = lbann.L1Norm(lbann.WeightedSum([gen_img,img], scaling_factors="1 -1")) #============================================== ### Set up source and destination layers layers = list(lbann.traverse_layer_graph(input)) weights = set() src_layers,dst_layers = [],[] for l in layers: if(l.weights and "disc1" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2, analogous to discrim.trainable=False in Keras if(l.weights and "disc2" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #============================================== ### Define Loss and Metrics #Define loss (Objective function) loss_list=[d1_real_bce,d1_fake_bce,d_adv_bce] ## Usual GAN loss function # loss_list=[d1_real_bce,d1_fake_bce] ## skipping adversarial loss for G for testing spectral loss if spectral_loss: dft_gen_img = lbann.DFTAbs(gen_img) dft_img = lbann.StopGradient(lbann.DFTAbs(img)) spec_loss = lbann.Log(lbann.MeanSquaredError(dft_gen_img, dft_img)) loss_list.append(lbann.LayerTerm(spec_loss, scale=8.0)) loss = lbann.ObjectiveFunction(loss_list) #Define metrics metrics = [lbann.Metric(d1_real_bce,name='d_real'),lbann.Metric(d1_fake_bce, name='d_fake'), lbann.Metric(d_adv_bce,name='gen_adv')] if spectral_loss: metrics.append(lbann.Metric(spec_loss,name='spec_loss')) #============================================== ### Define callbacks list callbacks_list=[] dump_outputs=True save_model=False print_model=False callbacks_list.append(lbann.CallbackPrint()) callbacks_list.append(lbann.CallbackTimer()) callbacks_list.append(lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers),batch_interval=1)) if dump_outputs: #callbacks_list.append(lbann.CallbackDumpOutputs(layers='inp_img gen_img_instance1_activation', execution_modes='train validation', directory='dump_outs',batch_interval=save_batch_interval,format='npy')) callbacks_list.append(lbann.CallbackDumpOutputs(layers='gen_img_instance1_activation', execution_modes='train validation', directory='dump_outs',batch_interval=save_batch_interval,format='npy')) if save_model : callbacks_list.append(lbann.CallbackSaveModel(dir='models')) if print_model: callbacks_list.append(lbann.CallbackPrintModelDescription()) ### Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=loss, callbacks=callbacks_list)
def construct_model(): """Construct LBANN model. JAG Wasserstein autoencoder model """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice') gt_y = lbann.Identity(inp_slice,name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0,num_neurons='1',name='zero') one = lbann.Constant(value=1.0,num_neurons='1',name='one') y_dim = 16399 #image+scalar shape z_dim = 20 #Latent space dim z = lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="20") d1_real, d1_fake, d_adv, pred_y = jag_models.WAE(z_dim,y_dim)(z,gt_y) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce') img_loss = lbann.MeanSquaredError([pred_y,gt_y]) rec_error = lbann.L2Norm2(lbann.WeightedSum([pred_y,gt_y], scaling_factors="1 -1")) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if(l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if(l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01) obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce,img_loss,rec_error,l2_reg]) # Initialize check metric callback metrics = [lbann.Metric(img_loss, name='recon_error')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)] # Construct model num_epochs = 100 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
def construct_model(run_args): """Construct LBANN model. Initial model for ATOM molecular VAE """ import lbann print("Dump model dir ", run_args.dump_model_dir) assert run_args.dump_model_dir, "evaluate script asssumes a pretrained WAE model" pad_index = run_args.pad_index assert pad_index is not None sequence_length = run_args.sequence_length assert sequence_length is not None print("sequence length is {}".format(sequence_length)) data_layout = "data_parallel" # Layer graph input_ = lbann.Identity(lbann.Input(name='inp', data_field='samples'), name='inp1') wae_loss = [] input_feature_dims = sequence_length embedding_size = run_args.embedding_dim dictionary_size = run_args.num_embeddings assert embedding_size is not None assert dictionary_size is not None save_output = True if run_args.dump_outputs_dir else False print("save output? ", save_output, "out dir ", run_args.dump_outputs_dir) z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims=run_args.z_dim) waemodel = molwae.MolWAE(input_feature_dims, dictionary_size, embedding_size, pad_index, run_args.z_dim, save_output) recon, d1_real, d1_fake, d_adv, arg_max = waemodel(input_, z) zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real, one], name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake, zero], name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv, one], name='d_adv_bce') wae_loss.append(recon) layers = list(lbann.traverse_layer_graph(input_)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if (l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if (l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_weights = [ w for w in weights if not isinstance(w.optimizer, lbann.NoOptimizer) ] l2_reg = lbann.L2WeightRegularization(weights=l2_weights, scale=1e-4) wae_loss.append(d1_real_bce) wae_loss.append(d_adv_bce) wae_loss.append(d1_fake_bce) wae_loss.append(l2_reg) print("LEN wae loss ", len(wae_loss)) obj = lbann.ObjectiveFunction(wae_loss) # Initialize check metric callback metrics = [ lbann.Metric(d_adv_bce, name='adv_loss'), lbann.Metric(recon, name='recon') ] callbacks = [ lbann.CallbackPrint(), #lbann.CallbackStepLearningRate(step=10, amt=0.5), lbann.CallbackTimer() ] callbacks.append( lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)) #Dump output (activation) for post processing if (run_args.dump_outputs_dir): pred_tensor = lbann.Concatenation(arg_max, name='pred_tensor') callbacks.append( lbann.CallbackDumpOutputs( batch_interval=run_args.dump_outputs_interval, execution_modes='test', directory=run_args.dump_outputs_dir, layers=f'inp pred_tensor {waemodel.q_mu.name}')) # Construct model return lbann.Model(run_args.num_epochs, weights=weights, layers=layers, objective_function=obj, metrics=metrics, callbacks=callbacks)
def __init__(self, size, bias = True, weights=[], name=None, data_layout='data_parallel'): """Initialize GRU cell. Args: size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most four - two matrices ((3*size) x (input_size) and (3*size) x (size) dimensions) each and two biases (3*size entries) each. If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'gru<index>'. data_layout (str): Data layout. """ super().__init__() GRU.global_count += 1 self.step = 0 self.size = size self.name = (name if name else 'gru{0}'.format(GRU.global_count)) self.data_layout = data_layout # Weights self.weights = list(make_iterable(weights)) if len(self.weights) > 4: raise ValueError('`GRU` has at most 4 weights, ' 'but got {0}'.format(len(self.weights))) ##@todo: use loop scale = 1 / math.sqrt(self.size) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_ih_matrix') ) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_ih_bias') ) if len(self.weights) == 2: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_hh_matrix') ) if len(self.weights) == 3: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_hh_bias') ) # Linearity ####Learnable input-hidden weights self.ih_fc = FullyConnectedModule( 3*size, bias=bias, weights=self.weights[:2], name=self.name + '_ih_fc', data_layout=self.data_layout ) ###Learnable hidden-hidden weights self.hh_fc = FullyConnectedModule( 3*size, bias=bias, weights=self.weights[2:], name=self.name + '_hh_fc', data_layout=self.data_layout ) self.ones = lbann.Constant( value=1.0, num_neurons=str(size), data_layout=self.data_layout, name=self.name+'_ones', )
def make_model(num_vertices=None, node_features=None, num_classes=None, dataset=None, kernel_type='GCN', callbacks=None, num_epochs=1): '''Construct a model DAG using one of the Graph Kernels Args: num_vertices (int): Number of vertices of each graph (default: None) node_features (int): Number of features per noded (default: None) num_classes (int): Number of classes as targets (default: None) dataset (str): Preset data set to use. Either a datset parameter has to be supplied or all of num_vertices, node_features, and num_classes have to be supplied. (default: None) kernel_type (str): Graph Kernel to use in model. Expected one of GCN, GIN, Graph, or GatedGraph (deafult: GCN) callbacks (list): Callbacks for the model. If set to None the model description, GPU usage, training_output, and timer is reported. (default: None) num_epochs (int): Number of epochs to run (default: 1) Returns: (lbann Model Object: A model object with the supplied callbacks, dataset presets, and graph kernels. ''' assert num_vertices != dataset #Ensure atleast one of the values is set if dataset is not None: assert num_vertices is None if dataset == 'MNIST': num_vertices = 75 num_classes = 10 node_features = 1 elif dataset == 'PROTEINS': num_vertices = 100 num_classes = 2 node_features = 3 else: raise Exception("Unkown Dataset") assert num_vertices is not None assert num_classes is not None assert node_features is not None #---------------------------------- # Reshape and Slice Input Tensor #---------------------------------- input_ = lbann.Input(target_mode='classification') # Input dimensions should be (num_vertices * node_features + num_vertices^2 + num_classes ) # Input should have atleast two children since the target is classification data = lbann_Graph_Data(input_, num_vertices, node_features, num_classes) feature_matrix = data.x adj_matrix = data.adj target = data.y #---------------------------------- # Perform Graph Convolution #---------------------------------- if kernel_type == 'GIN': x = GINConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'GCN': x = GCNConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'Graph': x = GraphConvLayer(feature_matrix, adj_matrix) elif kernel_type == 'GatedGraph': x = GATConvLayer(feature_matrix, adj_matrix) else: ValueError( 'Invalid Graph kernel specifier "{}" recieved. Expected one of:\ GIN,GCN,Graph or GatedGraph'.format(kernel_type)) out_channel = x.shape[1] #---------------------------------- # Apply Reduction on Node Features #---------------------------------- average_vector = lbann.Constant(value=1 / num_vertices, num_neurons=str_list([1, num_vertices]), name="Average_Vector") x = x.get_mat(out_channel) x = lbann.MatMul(average_vector, x, name="Node_Feature_Reduction") # X is now a vector with output_channel dimensions x = lbann.Reshape(x, dims=str_list([out_channel]), name="Squeeze") x = lbann.FullyConnected(x, num_neurons=64, name="hidden_layer_1") x = lbann.Relu(x, name="hidden_layer_1_activation") x = lbann.FullyConnected(x, num_neurons=num_classes, name="Output_Fully_Connected") #---------------------------------- # Loss Function and Accuracy s #---------------------------------- probs = lbann.Softmax(x, name="Softmax") loss = lbann.CrossEntropy(probs, target, name="Cross_Entropy_Loss") accuracy = lbann.CategoricalAccuracy(probs, target, name="Accuracy") layers = lbann.traverse_layer_graph(input_) if callbacks is None: print_model = lbann.CallbackPrintModelDescription( ) #Prints initial Model after Setup training_output = lbann.CallbackPrint( interval=1, print_global_stat_only=False) #Prints training progress gpu_usage = lbann.CallbackGPUMemoryUsage() timer = lbann.CallbackTimer() callbacks = [print_model, training_output, gpu_usage, timer] else: if isinstance(callbacks, list): callbacks = callbacks metrics = [lbann.Metric(accuracy, name='accuracy', unit="%")] model = lbann.Model(num_epochs, layers=layers, objective_function=loss, metrics=metrics, callbacks=callbacks) return model
def construct_model(): """Construct LBANN model. ExaGAN model """ import lbann # Layer graph input = lbann.Input(target_mode='N/A',name='inp_img') #label flipping label_flip_rand = lbann.Uniform(min=0,max=1, neuron_dims='1') label_flip_prob = lbann.Constant(value=0.01, num_neurons='1') one = lbann.GreaterEqual(label_flip_rand,label_flip_prob, name='is_real') zero = lbann.LogicalNot(one,name='is_fake') z = lbann.Reshape(lbann.Gaussian(mean=0.0,stdev=1.0, neuron_dims="64", name='noise_vec'),dims='1 64') d1_real, d1_fake, d_adv, gen_img = ExaGAN.CosmoGAN()(input,z) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real,one],name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake,zero],name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv,one],name='d_adv_bce') layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if(l.weights and "disc1" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2, analogous to discrim.trainable=False in Keras if(l.weights and "disc2" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) #l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) obj = lbann.ObjectiveFunction([d1_real_bce,d1_fake_bce,d_adv_bce]) # Initialize check metric callback metrics = [lbann.Metric(d1_real_bce,name='d_real'), lbann.Metric(d1_fake_bce, name='d_fake'), lbann.Metric(d_adv_bce,name='gen')] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer(), #Uncomment to dump output for plotting and further statistical analysis #lbann.CallbackDumpOutputs(layers='inp_img gen_img_instance1_activation', # execution_modes='train validation', # directory='dump_outs', # batch_interval=100, # format='npy'), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2)] # Construct model num_epochs = 20 return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
def make_model( num_epochs, embed_dim, num_heads, label_smoothing, ): # Embedding weights var = 2 / (embed_dim + vocab_size) # Glorot initialization embedding_weights = lbann.Weights( name='embeddings', initializer=lbann.NormalInitializer(standard_deviation=math.sqrt(var)), ) # Input is two sequences of token IDs input_ = lbann.Input(data_field='samples') # Get sequences of embedding vectors # Note: Scale embeddings by sqrt(embed_dim). # Note: Decoder input is shifted right, so embedding for last # token isn't needed. embeddings_tokens = lbann.Identity( lbann.Slice( input_, axis=0, slice_points=str_list([0, 2 * sequence_length - 1]), )) embeddings = lbann.Embedding( embeddings_tokens, weights=embedding_weights, num_embeddings=vocab_size, embedding_dim=embed_dim, padding_idx=pad_index, ) embeddings = lbann.WeightedSum( embeddings, scaling_factors=str(math.sqrt(embed_dim)), ) embeddings_slice = lbann.Slice( embeddings, axis=0, slice_points=str_list([0, sequence_length, 2 * sequence_length - 1]), ) encoder_input = lbann.Identity(embeddings_slice) decoder_input = lbann.Identity(embeddings_slice) # Apply transformer model transformer = lbann.models.Transformer( hidden_size=embed_dim, num_heads=num_heads, name='transformer', ) result = transformer( encoder_input, sequence_length, decoder_input, sequence_length - 1, ) # Reconstruct decoder input preds = lbann.ChannelwiseFullyConnected( result, weights=embedding_weights, output_channel_dims=[vocab_size], bias=False, transpose=True, ) preds = lbann.ChannelwiseSoftmax(preds) preds = lbann.Slice(preds, axis=0, slice_points=str_list(range(sequence_length))) preds = [lbann.Identity(preds) for _ in range(sequence_length - 1)] # Count number of non-pad tokens label_tokens = lbann.Identity( lbann.Slice( input_, slice_points=str_list([sequence_length + 1, 2 * sequence_length]), )) pads = lbann.Constant(value=pad_index, num_neurons=str(sequence_length - 1)) is_not_pad = lbann.NotEqual(label_tokens, pads) num_not_pad = lbann.Reduction(is_not_pad, mode='sum') # Cross entropy loss with label smoothing label_tokens = lbann.Slice( label_tokens, slice_points=str_list(range(sequence_length)), ) label_tokens = [ lbann.Identity(label_tokens) for _ in range(sequence_length - 1) ] if label_smoothing > 0: uniform_label = lbann.Constant(value=1 / vocab_size, num_neurons=str_list([1, vocab_size])) loss = [] for i in range(sequence_length - 1): label = lbann.OneHot(label_tokens[i], size=vocab_size) label = lbann.Reshape(label, dims=str_list([1, vocab_size])) if label_smoothing > 0: label = lbann.WeightedSum( label, uniform_label, scaling_factors=str_list( [1 - label_smoothing, label_smoothing]), ) loss.append(lbann.CrossEntropy(preds[i], label)) loss = lbann.Concatenation(loss) # Average cross entropy over non-pad tokens loss_scales = lbann.Divide( is_not_pad, lbann.Tessellate(num_not_pad, hint_layer=is_not_pad), ) loss = lbann.Multiply(loss, loss_scales) loss = lbann.Reduction(loss, mode='sum') # Construct model metrics = [] callbacks = [lbann.CallbackPrint(), lbann.CallbackTimer()] return lbann.Model( num_epochs, layers=lbann.traverse_layer_graph(input_), objective_function=loss, metrics=metrics, callbacks=callbacks, )
# Get sequence of embedding vectors embeddings = lbann.Embedding(input_, num_embeddings=vocab_size, embedding_dim=args.latent_dim) embeddings_slice = lbann.Slice(embeddings, axis=0, slice_points=str_list(range(sequence_length + 1))) embeddings_list = [ lbann.Reshape(embeddings_slice, dims='-1') for _ in range(sequence_length) ] # Layer modules lstm = lbann.modules.LSTMCell(args.latent_dim) lstm_state = [ lbann.Constant(value=0, num_neurons=str_list(args.latent_dim)), lbann.Constant(value=0, num_neurons=str_list(args.latent_dim)) ] pred_fc = lbann.modules.FullyConnectedModule(vocab_size, data_layout='model_parallel') # Iterate through RNN steps loss = [] for step in range(sequence_length - 1): # Predict next token with RNN x = embeddings_list[step] x, lstm_state = lstm(x, lstm_state) x = pred_fc(x) pred = lbann.Softmax(x)
def construct_macc_surrogate_model(xdim, ydim, zdim, wae_mcf, surrogate_mcf, lambda_cyc, useCNN, dump_models, pretrained_dir, ltfb_batch_interval, num_epochs): """Construct MACC surrogate model. See https://arxiv.org/pdf/1912.08113.pdf model architecture and other details """ # Layer graph input = lbann.Input(data_field='samples', name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, ydim, ydim + xdim]), name='inp_slice') gt_y = lbann.Identity(inp_slice, name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="20") wae = macc_network_architectures.MACCWAE( zdim, ydim, cf=wae_mcf, use_CNN=useCNN) #pretrained, freeze inv = macc_network_architectures.MACCInverse(xdim, cf=surrogate_mcf) fwd = macc_network_architectures.MACCForward(zdim, cf=surrogate_mcf) y_pred_fwd = wae.encoder(gt_y) param_pred_ = wae.encoder(gt_y) input_fake = inv(param_pred_) output_cyc = fwd(input_fake) y_image_re2 = wae.decoder(output_cyc) '''**** Train cycleGAN input params <--> latent space of (images, scalars) ****''' output_fake = fwd(gt_x) y_image_re = wae.decoder(output_fake) param_pred2_ = wae.encoder(y_image_re) input_cyc = inv(param_pred2_) L_l2_x = lbann.MeanSquaredError(input_fake, gt_x) L_cyc_x = lbann.MeanSquaredError(input_cyc, gt_x) L_l2_y = lbann.MeanSquaredError(output_fake, y_pred_fwd) L_cyc_y = lbann.MeanSquaredError(output_cyc, y_pred_fwd) #@todo slice here to separate scalar from image img_sca_loss = lbann.MeanSquaredError(y_image_re, gt_y) #L_cyc = L_cyc_y + L_cyc_x L_cyc = lbann.Add(L_cyc_y, L_cyc_x) #loss_gen0 = L_l2_y + lamda_cyc*L_cyc loss_gen0 = lbann.WeightedSum([L_l2_y, L_cyc], scaling_factors=f'1 {lambda_cyc}') loss_gen1 = lbann.WeightedSum([L_l2_x, L_cyc_y], scaling_factors=f'1 {lambda_cyc}') #loss_gen1 = L_l2_x + lamda_cyc*L_cyc_y layers = list(lbann.traverse_layer_graph(input)) weights = set() #Freeze appropriate (pretrained) weights pretrained_models = ["wae"] #add macc? for l in layers: for idx in range(len(pretrained_models)): if (l.weights and pretrained_models[idx] in l.name): for w in range(len(l.weights)): l.weights[w].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) #d_adv_bce = lbann.LayerTerm(d_adv_bce,scale=0.01) # Setup objective function obj = lbann.ObjectiveFunction([loss_gen0, loss_gen1, l2_reg]) # Initialize check metric callback metrics = [ lbann.Metric(img_sca_loss, name='fw_loss'), lbann.Metric(L_l2_x, name='inverse loss'), lbann.Metric(L_cyc_y, name='output cycle loss'), lbann.Metric(L_cyc_x, name='param cycle loss') ] callbacks = [ lbann.CallbackPrint(), lbann.CallbackSaveModel(dir=dump_models), lbann.CallbackLoadModel(dirs=str(pretrained_dir)), lbann.CallbackTimer() ] if (ltfb_batch_interval > 0): callbacks.append( lbann.CallbackLTFB(batch_interval=ltfb_batch_interval, metric='fw_loss', low_score_wins=True, exchange_hyperparameters=True)) # Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)
def construct_jag_wae_model(ydim, zdim, mcf, useCNN, dump_models, ltfb_batch_interval, num_epochs): """Construct LBANN model. JAG Wasserstein autoencoder model """ # Layer graph input = lbann.Input(data_field='samples', name='inp_data') # data is 64*64*4 images + 15 scalar + 5 param #inp_slice = lbann.Slice(input, axis=0, slice_points="0 16399 16404",name='inp_slice') inp_slice = lbann.Slice(input, axis=0, slice_points=str_list([0, ydim, ydim + 5]), name='inp_slice') gt_y = lbann.Identity(inp_slice, name='gt_y') gt_x = lbann.Identity(inp_slice, name='gt_x') #param not used zero = lbann.Constant(value=0.0, num_neurons='1', name='zero') one = lbann.Constant(value=1.0, num_neurons='1', name='one') z_dim = 20 #Latent space dim z = lbann.Gaussian(mean=0.0, stdev=1.0, neuron_dims="20") model = macc_network_architectures.MACCWAE(zdim, ydim, cf=mcf, use_CNN=useCNN) d1_real, d1_fake, d_adv, pred_y = model(z, gt_y) d1_real_bce = lbann.SigmoidBinaryCrossEntropy([d1_real, one], name='d1_real_bce') d1_fake_bce = lbann.SigmoidBinaryCrossEntropy([d1_fake, zero], name='d1_fake_bce') d_adv_bce = lbann.SigmoidBinaryCrossEntropy([d_adv, one], name='d_adv_bce') img_loss = lbann.MeanSquaredError([pred_y, gt_y]) rec_error = lbann.L2Norm2( lbann.WeightedSum([pred_y, gt_y], scaling_factors="1 -1")) layers = list(lbann.traverse_layer_graph(input)) # Setup objective function weights = set() src_layers = [] dst_layers = [] for l in layers: if (l.weights and "disc0" in l.name and "instance1" in l.name): src_layers.append(l.name) #freeze weights in disc2 if (l.weights and "disc1" in l.name): dst_layers.append(l.name) for idx in range(len(l.weights)): l.weights[idx].optimizer = lbann.NoOptimizer() weights.update(l.weights) l2_reg = lbann.L2WeightRegularization(weights=weights, scale=1e-4) d_adv_bce = lbann.LayerTerm(d_adv_bce, scale=0.01) obj = lbann.ObjectiveFunction( [d1_real_bce, d1_fake_bce, d_adv_bce, img_loss, rec_error, l2_reg]) # Initialize check metric callback metrics = [lbann.Metric(img_loss, name='recon_error')] #pred_y = macc_models.MACCWAE.pred_y_name callbacks = [ lbann.CallbackPrint(), lbann.CallbackTimer(), lbann.CallbackPrintModelDescription(), lbann.CallbackSaveModel(dir=dump_models), lbann.CallbackReplaceWeights(source_layers=list2str(src_layers), destination_layers=list2str(dst_layers), batch_interval=2) ] if (ltfb_batch_interval > 0): callbacks.append( lbann.CallbackLTFB(batch_interval=ltfb_batch_interval, metric='recon_error', low_score_wins=True, exchange_hyperparameters=True)) # Construct model return lbann.Model(num_epochs, weights=weights, layers=layers, metrics=metrics, objective_function=obj, callbacks=callbacks)