def __init__(self, size, bias = True, weights=[], name=None, data_layout='data_parallel'): """Initialize LSTM cell. Args: size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most two - a matrix ((4*size) x (input_size+size) dimensions) and a bias (4*size entries). If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'lstmcell<index>'. data_layout (str): Data layout. """ super().__init__() LSTMCell.global_count += 1 self.step = 0 self.size = size self.name = (name if name else 'lstmcell{0}'.format(LSTMCell.global_count)) self.data_layout = data_layout # Initial state self.last_output = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_output', data_layout=self.data_layout) self.last_cell = lbann.Constant(value=0.0, num_neurons=str(size), name=self.name + '_init_cell', data_layout=self.data_layout) # Weights self.weights = list(make_iterable(weights)) if len(self.weights) > 2: raise ValueError('`LSTMCell` has at most two weights, ' 'but got {0}'.format(len(self.weights))) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_matrix')) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-1/sqrt(self.size), max=-1/sqrt(self.size)), name=self.name+'_bias')) # Linearity self.fc = FullyConnectedModule(4*size, bias=bias, weights=self.weights, name=self.name + '_fc', data_layout=self.data_layout)
def __init__(self, output_channels, num_layers=1, name=None): """Initialize GatedGraph layer Args: output_channels (int): The output size of the node features num_layers (int): Number of passes through the GRU (default: 1) name (str): Name of the layers and prefix to use for the layers. data_layout (str): Data layout (default: data parallel) """ super().__init__() ## Add Name for the components for the layer GatedGraphConv.global_count += 1 self.name = (name if name else 'GatedGraphConv_{}'.format( GatedGraphConv.global_count)) ## Add variables self.output_channels = output_channels self.rnn = lbann.modules.GRU(output_channels) self.num_layers = num_layers self.data_layout = data_layout self.weights = [] for i in range(num_layers): weight_init = lbann.Weights(initializer=lbann.UniformInitializer( min=-1 / (math.sqrt(output_channels)), max=1 / (math.sqrt(output_channels)))) weight_layer = lbann.WeightsLayer( dims=str_list([output_channels, output_channels]), weights=weight_init, name=self.name + '_' + str(i) + '_weight', data_layout=self.data_layout) self.weights.append(weight_layer)
def __init__( self, num_vertices, motif_size, embed_dim, learn_rate, ): super().__init__() self.num_vertices = num_vertices self.embed_dim = embed_dim self.learn_rate = learn_rate # Initialize weights # Note: The discriminator's probability estimate is # D = 1 - exp(-sum_j(prod_i(d_ij))) # Treating the embeddings as i.i.d. random variables: # D = 1 - exp( -embed_dim * d^motif_size ) # log(d) = log( -log(1-D) / embed_dim ) / motif_size # We initialize the embeddings in log-space so that the # discriminator's initial probability estimates have mean 0.5. mean = math.log( -math.log(1-0.5) / embed_dim ) / motif_size radius = math.log( -math.log(1-0.75) / embed_dim ) / motif_size - mean self.log_embedding_weights = lbann.Weights( initializer=lbann.UniformInitializer( min=mean-radius, max=mean+radius), name='discriminator_log_embeddings', )
def _xavier_uniform_init(fan_in, fan_out): """ Xavier uniform initializer Args: fan_in (int): input size of the learning layer fan_out (int): output size of the learning layer Returns: (UniformInitializer): return an lbann UniformInitializer object """ a = math.sqrt(6 / (fan_in + fan_out)) return lbann.UniformInitializer(min=-a, max=a)
def __init__(self, input_channels, output_channels, name=None): super().__init__() self.name = (name if name else 'DenseGraph_{}'.format( DenseGraphConv.global_count)) DenseGraphConv.global_count += 1 bounds = math.sqrt(6.0 / (input_channels + output_channels)) self.weights_1 = lbann.Weights(initializer=lbann.UniformInitializer( min=-bounds, max=bounds), name=self.name + '_Weights_1') self.weights_2 = lbann.Weights(initializer=lbann.UniformInitializer( min=-bounds, max=bounds), name=self.name + '_Weights_2') self.W1 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_param_1', weights=self.weights_1) self.W2 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_param_2', weights=self.weights_2)
def __init__(self, num_channels, size, bias=True, weights=[], name=None): """Initialize GRU cell. Args: num_channels (int): The number of rows in the matrix to perform GRU size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most four - two matrices ((3*size) x (input_size) and (3*size) x (size) dimensions) each and two biases (3*size entries) each. If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'gru<index>'. data_layout (str): Data layout. """ super().__init__() ChannelwiseGRU.global_count += 1 self.step = 0 self.size = size self.num_channels = num_channels self.name = (name if name else f'gru{ChannelwiseGRU.global_count}') self.data_layout = 'data_parallel' scale = 1 / math.sqrt(self.size) self.weights = list(make_iterable(weights)) weight_name = ['_ih_matrix', '_ih_bias', '_hh_matrix', '_hh_bias'] for i in range(4): if (len(self.weights) == i): self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer( min=-scale, max=scale), name=self.name + weight_name[i])) self.ih_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[:2], name=self.name + '_ih_fc') self.hh_fc = ChannelwiseFullyConnectedModule(3 * size, bias=bias, weights=self.weights[2:], name=self.name + '_hh_fc') self.ones = lbann.Constant(value=1.0, num_neurons=str_list([num_channels, size]), name=self.name + '_ones')
def __init__( self, hidden_size, num_layers=1, weights=[], name=None, device=None, datatype=None, weights_datatype=None, ): GRUModule.global_count += 1 self.instance = 0 self.hidden_size = hidden_size self.num_layers = num_layers self.name = name if name else f'gru{GRUModule.global_count}' self.device = device self.datatype = datatype # Construct weights if needed self.weights = weights if not self.weights: scale = 1 / math.sqrt(self.hidden_size) init = lbann.UniformInitializer(min=-scale,max=scale) if weights_datatype is None: weights_datatype = self.datatype self.weights = [] for i in range(self.num_layers): self.weights.extend( lbann.Weights( initializer=init, name=f'{self.name}_layer{i}_{weight_name}', datatype=weights_datatype, ) for weight_name in ('ih_matrix', 'hh_matrix', 'ih_bias', 'hh_bias') ) if self.weights and len(self.weights) != 4*self.num_layers: raise ValueError( f'expected {4*self.num_layers} weights, ' f'but recieved {len(self.weights)}' ) # Default initial hidden state self.zeros = lbann.Constant( value=0, num_neurons=str_list([num_layers, hidden_size]), name=f'{self.name}_zeros', device=self.device, datatype=self.datatype, )
def __init__(self, input_channels, output_channels, name=None): super().__init__() DenseGCNConv.global_count += 1 self.name = (name if name else 'Dense_GCN_{}'.format( DenseGCNConv.global_count)) bounds = math.sqrt(6.0 / (input_channels + output_channels)) self.weights = lbann.Weights(initializer=lbann.UniformInitializer( min=-bounds, max=bounds), name=self.name + '_Weights') self.W = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_layer', weights=self.weights)
def __init__(self, input_channels, output_channels, num_nodes, num_layers = 1, name = None): """Initialize GatedGraph layer Args: input_channels (int): The size of the input node features output_channels (int): The output size of the node features num_nodes (int): Number of vertices in the graph num_layers (int): Number of passes through the GRU (default: 1) name (str): Name of the layers and prefix to use for the layers. data_layout (str): Data layout (default: data parallel) """ super().__init__() ## Add Name for the components for the layer GatedGraphConv.global_count +=1 self.name = (name if name else 'GatedGraphConv_{}'.format(GatedGraphConv.global_count)) ## Add variables self.output_channel_size = output_channels self.input_channel_size = input_channels self.num_nodes = num_nodes self.rnn = lbann.modules.ChannelwiseGRU(num_nodes, output_channels) self.num_layers = num_layers self.nns = [] for i in range(num_layers): weights = lbann.Weights(initializer = lbann.UniformInitializer(min =-1/(math.sqrt(output_channels)), max = 1/(math.sqrt(output_channels)))) nn = \ ChannelwiseFullyConnectedModule(self.output_channel_size, bias=False, weights=[weights], name=f"{self.name}_nn_{i}") self.nns.append(nn)
def __init__(self, size, bias = True, weights=[], name=None, data_layout='data_parallel'): """Initialize GRU cell. Args: size (int): Size of output tensor. bias (bool): Whether to apply biases after linearity. weights (`Weights` or iterator of `Weights`): Weights in fully-connected layer. There are at most four - two matrices ((3*size) x (input_size) and (3*size) x (size) dimensions) each and two biases (3*size entries) each. If weights are not provided, the matrix and bias will be initialized in a similar manner as PyTorch (uniform random values from [-1/sqrt(size), 1/sqrt(size)]). name (str): Default name is in the form 'gru<index>'. data_layout (str): Data layout. """ super().__init__() GRU.global_count += 1 self.step = 0 self.size = size self.name = (name if name else 'gru{0}'.format(GRU.global_count)) self.data_layout = data_layout # Weights self.weights = list(make_iterable(weights)) if len(self.weights) > 4: raise ValueError('`GRU` has at most 4 weights, ' 'but got {0}'.format(len(self.weights))) ##@todo: use loop scale = 1 / math.sqrt(self.size) if len(self.weights) == 0: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_ih_matrix') ) if len(self.weights) == 1: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_ih_bias') ) if len(self.weights) == 2: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_hh_matrix') ) if len(self.weights) == 3: self.weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-scale, max=scale), name=self.name+'_hh_bias') ) # Linearity ####Learnable input-hidden weights self.ih_fc = FullyConnectedModule( 3*size, bias=bias, weights=self.weights[:2], name=self.name + '_ih_fc', data_layout=self.data_layout ) ###Learnable hidden-hidden weights self.hh_fc = FullyConnectedModule( 3*size, bias=bias, weights=self.weights[2:], name=self.name + '_hh_fc', data_layout=self.data_layout ) self.ones = lbann.Constant( value=1.0, num_neurons=str(size), data_layout=self.data_layout, name=self.name+'_ones', )
def __init__(self, input_channels, output_channels, num_nodes, bias=True, activation=lbann.Relu, name=None): """Initialize Graph layer Args: input_channels (int): The size of the input node features output_channels (int): The output size of the node features num_nodes (int): Number of vertices in the graph bias (bool): Whether to apply biases after weights transform activation (type): Activation layer for the node features. If None, then no activation is applied. (default: lbann.Relu) name (str): Default name of the layer is Graph_{number} """ super().__init__() ## Add variables self.input_channel_size = input_channels self.output_channel_size = output_channels self.num_nodes = num_nodes ## Add Name for the components for the layer GraphConv.global_count += 1 self.name = (name if name else 'Graph_{}'.format(GraphConv.global_count)) ## Initialize weights for the matrix value = math.sqrt(6 / (input_channels + output_channels)) mat_weights = [] id_weights = [] mat_weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-value, max=value), name=self.name + '_Weights')) id_weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-value, max=value), name=self.name + '_ID_Weights')) ## Initialize bias variables self.has_bias = bias if (self.has_bias): mat_weights.append( lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bias_weights')) self.activation = None if activation: if isinstance(activation, type): self.activation = activation else: self.activation = type(actvation) if not issubclass(self.activation, lbann.Layer): raise ValueError('activation must be a layer') self.id_nn = \ ChannelwiseFullyConnectedModule(self.output_channel_size, bias=False, weights=id_weights, activation=self.activation, name=self.name+"_ID_FC_layer") self.mat_nn = \ ChannelwiseFullyConnectedModule(self.output_channel_size, bias=self.has_bias, weights=mat_weights, activation=self.activation, name=self.name+"_Message_FC_layer")
def __init__(self, input_channels, output_channels, bias=True, activation=lbann.Relu, name=None, data_layout='data_parallel'): """Initialize Graph layer Args: input_channels (int): The size of the input node features output_channels (int): The output size of the node features bias (bool): Whether to apply biases after MatMul name (str): Default name of the layer is GCN_{number} data_layout (str): Data layout activation (type): Activation layer for the node features. If None, then no activation is applied. (default: lbann.Relu) """ super().__init__() ## Add variables self.input_channels = input_channels self.output_channels = output_channels self.data_layout = data_layout ## Add Name for the components for the layer GraphConv.global_count += 1 self.name = (name if name else 'Graph_{}'.format(GraphConv.global_count)) ## Initialize weights for the matrix value = math.sqrt(6 / (input_channels + output_channels)) self.mat_weights = lbann.Weights(initializer=lbann.UniformInitializer( min=-value, max=value), name=self.name + '_Weights') self.weights1 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_layer', weights=self.mat_weights) self.id_weights = lbann.Weights(initializer=lbann.UniformInitializer( min=-value, max=value), name=self.name + '_ID_Weights') self.weights2 = lbann.WeightsLayer(dims=str_list( [input_channels, output_channels]), name=self.name + '_ID_layer', weights=self.id_weights) ## Initialize bias variables self.has_bias = bias self.bias_weights = None self.bias = None if (self.has_bias): self.bias_weights = lbann.Weights( initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bias_weights') self.bias = lbann.WeightsLayer(dims=str_list([1, output_channels]), weights=self.bias_weights, name=self.name + '_bias_layer') self.activation = None if activation: if isinstance(activation, type): self.activation = activation else: self.activation = type(actvation) if not issubclass(self.activation, lbann.Layer): raise ValueError('activation must be a layer')
def NNConvLayer(node_features, neighbor_features, edge_features, edge_index, in_channel, out_channel, edge_embedding_dim, NUM_NODES, NUM_EDGES, NUM_GROUPS=0): """ Helper function to create a NNConvLayer with a 3-layer MLP kernel Args: node_features (Layer): Layer containing the node featue matrix of the graph (NUM_NODES, in_channel) neighbor_features (Layer): Layer containing the neighbor feature tensor of the graph of shape (NUM_EDGES, 1, in_channel) edge_features (Layer): Layer containing the edge feature matrix of the graph of shape (NUM_EDGES, EMBEDDED_EDGE_FEATURES) edge_index (Layer): Layer contain the source edge index vector of the graph of shape (NUM_EDGES) in_channel (int): The embedding dimensionality of the node feature vector out_channel (int): The dimensionality of the node feature vectors after graph convolutions NUM_NODES (int): The number of nodes in the largest graph in the dataset (51 for LSC-PPQM4M) NUM_EDGES (int): The number of edges in the largest graph in the dataset (118 for LSC-PPQM4M) NUM_GROUPS (int): The number of channel groups for distconv channelwise fully connected layer (default : 0) """ FC = ChannelwiseFullyConnectedModule k_1 = math.sqrt(1 / in_channel) k_2 = math.sqrt(1 / 64) k_3 = math.sqrt(1 / 32) nn_sq_1_weight = lbann.Weights(initializer=lbann.UniformInitializer( min=-k_1, max=k_1), name="gnn_weights_{}".format(0)) nn_sq_2_weight = lbann.Weights(initializer=lbann.UniformInitializer( min=-k_2, max=k_2), name="gnn_weights_weights_{}".format(1)) nn_sq_3_weight = lbann.Weights(initializer=lbann.UniformInitializer( min=-k_3, max=k_3), name="gnn_weights_weights_{}".format(2)) FC1 = 64 FC2 = 32 FC3 = out_channel * in_channel if NUM_GROUPS > 0: FC1 = [1, FC1] FC2 = [1, FC2] FC3 = [1, FC3] sequential_nn = \ [FC(FC1, weights=[nn_sq_1_weight], name="NN_SQ_1", bias=True, activation=lbann.Relu, parallel_strategy=create_parallel_strategy(NUM_GROUPS)), FC(FC2, weights=[nn_sq_2_weight], name="NN_SQ_2", bias=True, activation=lbann.Relu, parallel_strategy=create_parallel_strategy(NUM_GROUPS)), FC(FC3, weights=[nn_sq_3_weight], name="NN_SQ_3", bias=True, activation=lbann.Relu, parallel_strategy=create_parallel_strategy(NUM_GROUPS))] nn_conv = NNConv(sequential_nn, NUM_NODES, NUM_EDGES, in_channel, out_channel, edge_embedding_dim) out = nn_conv(node_features, neighbor_features, edge_features, edge_index) return out
def __init__(self, input_channels, output_channels, num_nodes, bias=True, activation=lbann.Relu, name=None, parallel_strategy={}): """Initialize GCN layer Args: input_channels (int): The size of the input node features output_channels (int): The output size of the node features num_nodes (int): Number of vertices in the graph bias (bool): Whether to apply biases after weights transform activation (type): Activation leyer for the node features. If None, then no activation is applied. (default: lbann.Relu) name (str): Default name of the layer is GCN_{number} parallel_strategy (dict): Data partitioning scheme. """ super().__init__() ## Add variables self.input_channel_size = input_channels self.output_channel_size = output_channels self.num_nodes = num_nodes self.parallel_strategy = parallel_strategy self.instance = 0 self.is_distconv = False if parallel_strategy: if list(parallel_strategy.values()[0]) > 0: self.is_distconv = True ## Add Name for the components for the layer GCNConv.global_count += 1 self.name = (name if name else 'GCN_{}'.format(GCNConv.global_count)) weights = [] ## Initialize weights for the matrix value = math.sqrt(6 / (input_channels + output_channels)) weights.append( lbann.Weights(initializer=lbann.UniformInitializer(min=-value, max=value), name=self.name + '_weights')) ## Initialize bias variables self.has_bias = bias if (self.has_bias): weights.append( lbann.Weights(initializer=lbann.ConstantInitializer(value=0.0), name=self.name + '_bias_weights')) self.activation = None if activation: if isinstance(activation, type): self.activation = activation else: self.activation = type(actvation) if not issubclass(self.activation, lbann.Layer): raise ValueError('activation must be a layer') # Distconv channelwise fully connected expects 3D tensors as input # and output. This check adds an extra dimention to enable # channel-wise data partitioning self.output_channels = self.output_channel_size if self.is_distconv: self.output_channels = [1, self.output_channel_size] self.nn = \ ChannelwiseFullyConnectedModule(self.output_channels, bias=self.has_bias, weights=weights, activation=self.activation, name=self.name+"_FC_layer", parallel_strategy=self.parallel_strategy)