def __init__(self, out_dim, hidden_dim=16, n_layers=4, n_atom_types=MAX_ATOMIC_NUM, concat_hidden=False, weight_tying=True): super(GGNN, self).__init__() n_readout_layer = 1 if concat_hidden else n_layers n_message_layer = 1 if weight_tying else n_layers with self.init_scope(): # Update self.embed = EmbedAtomID(out_size=hidden_dim, in_size=n_atom_types) self.message_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, self.NUM_EDGE_TYPE * hidden_dim) for _ in range(n_message_layer) ]) self.update_layer = links.GRU(2 * hidden_dim, hidden_dim) # Readout self.i_layers = chainer.ChainList(*[ GraphLinear(2 * hidden_dim, out_dim) for _ in range(n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, out_dim) for _ in range(n_readout_layer) ]) self.out_dim = out_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.concat_hidden = concat_hidden self.weight_tying = weight_tying
def __init__(self, in_channels, out_channels, nobias=True): super(RelGCNReadout, self).__init__() with self.init_scope(): self.sig_linear = GraphLinear( in_channels, out_channels, nobias=nobias) self.tanh_linear = GraphLinear( in_channels, out_channels, nobias=nobias)
def __init__(self, out_dim=1, hidden_dim=32): super(SchNetReadout, self).__init__() with self.init_scope(): self.linear1 = GraphLinear(hidden_dim) self.linear2 = GraphLinear(out_dim) self.out_dim = out_dim self.hidden_dim = hidden_dim
def __init__(self, hidden_dim=16, dropout_ratio=0.5): super(GINUpdate, self).__init__() with self.init_scope(): # two Linear + RELU self.linear_g1 = GraphLinear(hidden_dim, hidden_dim) self.linear_g2 = GraphLinear(hidden_dim, hidden_dim) # end with self.dropout_ratio = dropout_ratio
def __init__(self, in_channels, out_channels, num_edge_type=4): super(RelGCNUpdate, self).__init__() with self.init_scope(): self.graph_linear_self = GraphLinear(in_channels, out_channels) self.graph_linear_edge = GraphLinear( in_channels, out_channels * num_edge_type) self.num_edge_type = num_edge_type self.in_channels = in_channels self.out_channels = out_channels
def __init__(self, in_channel, out_channel, num_edge_type=4): super(AtomEmbedRGCNUpdate, self).__init__() with self.init_scope(): self.graph_linear_edge = GraphLinear(in_channel, out_channel * num_edge_type) self.graph_linear_self = GraphLinear(in_channel, out_channel) self.num_edge_type = num_edge_type self.in_channel = in_channel self.out_channel = out_channel
def __init__( self, out_dim, hidden_dim=16, n_layers=4, n_atom_types=MAX_ATOMIC_NUM, concat_hidden=False, dropout_rate=0.0, layer_aggr=None, batch_normalization=False, weight_tying=True, update_tying=True, ): super(GGNN, self).__init__() n_readout_layer = n_layers if concat_hidden else 1 n_message_layer = 1 if weight_tying else n_layers n_update_layer = 1 if update_tying else n_layers self.n_readout_layer = n_readout_layer self.n_message_layer = n_message_layer self.out_dim = out_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.concat_hidden = concat_hidden self.dropout_rate = dropout_rate self.batch_normalization = batch_normalization self.weight_tying = weight_tying self.update_tying = update_tying self.layer_aggr = layer_aggr with self.init_scope(): # Update self.embed = EmbedAtomID(out_size=hidden_dim, in_size=n_atom_types) self.message_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, self.NUM_EDGE_TYPE * hidden_dim) for _ in range(n_message_layer) ]) self.update_layer = chainer.ChainList(*[ links.Linear(2 * hidden_dim, hidden_dim) for _ in range(n_update_layer) ]) # self.update_layer = links.GRU(2 * hidden_dim, hidden_dim) # Layer Aggregation self.aggr = select_aggr(layer_aggr, 1, hidden_dim, hidden_dim) # Readout self.i_layers = chainer.ChainList(*[ GraphLinear(2 * hidden_dim, out_dim) for _ in range(n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, out_dim) for _ in range(n_readout_layer) ])
def __init__(self, out_dim, hidden_dim=16, nobias=False, activation=functions.identity, activation_agg=functions.identity): super(GGNNReadout, self).__init__() with self.init_scope(): self.i_layer = GraphLinear(None, out_dim, nobias=nobias) self.j_layer = GraphLinear(None, out_dim, nobias=nobias) self.out_dim = out_dim self.hidden_dim = hidden_dim self.nobias = nobias self.activation = activation self.activation_agg = activation_agg
def __init__( self, out_dim, hidden_dim=16, n_layers=4, n_atom_types=MAX_ATOMIC_NUM, concat_hidden=False, dropout_rate=0.0, batch_normalization=False, weight_tying=True, output_atoms=True, ): super(GGNN, self).__init__() n_readout_layer = n_layers if concat_hidden else 1 n_message_layer = 1 if weight_tying else n_layers self.n_readout_layer = n_readout_layer self.n_message_layer = n_message_layer self.out_dim = out_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.concat_hidden = concat_hidden self.dropout_rate = dropout_rate self.batch_normalization = batch_normalization self.weight_tying = weight_tying self.output_atoms = output_atoms with self.init_scope(): # Update self.embed = EmbedAtomID(out_size=hidden_dim, in_size=n_atom_types) self.message_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, self.NUM_EDGE_TYPE * hidden_dim) for _ in range(n_message_layer) ]) self.update_layer = links.GRU(2 * hidden_dim, hidden_dim) # Readout self.i_layers = chainer.ChainList(*[ GraphLinear(2 * hidden_dim, out_dim) for _ in range(n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(hidden_dim, out_dim) for _ in range(n_readout_layer) ]) if self.output_atoms: self.atoms_list = [] self.g_vec_list = []
def __init__(self, hidden_dim=64): super(SchNetUpdate, self).__init__() with self.init_scope(): self.linear = chainer.ChainList( *[GraphLinear(hidden_dim) for _ in range(3)]) self.cfconv = CFConv(hidden_dim=hidden_dim) self.hidden_dim = hidden_dim
def __init__(self, hidden_dim, out_dim, head, activation=functions.tanh, weight_tying=True): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(ParallelCoattention, self).__init__() n_entities = 1 if weight_tying else 2 with self.init_scope(): self.energy_layers = chainer.ChainList(*[ links.Bilinear(hidden_dim, out_dim, head) for _ in range(n_entities) ]) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation self.weight_tying = weight_tying
def __init__(self, hidden_dim_super=16, hidden_dim=16, n_heads=8, dropout_ratio=-1, activation=functions.tanh): super(GraphTransmitterUnit, self).__init__() hdim_n = hidden_dim * n_heads with self.init_scope(): self.V_super = GraphLinear(hidden_dim, hdim_n) self.W_super = links.Linear(hdim_n, hidden_dim_super) self.B = GraphLinear(hidden_dim, n_heads * hidden_dim_super) self.hidden_dim = hidden_dim self.hidden_dim_super = hidden_dim_super self.dropout_ratio = dropout_ratio self.n_heads = n_heads self.activation = activation
def __init__(self, in_channels, out_channels): super(RSGCNUpdate, self).__init__() with self.init_scope(): self.graph_linear = GraphLinear(in_channels, out_channels, nobias=True) self.in_channels = in_channels self.out_channels = out_channels
def construct_layer_aggregator(self): """ self.layer_aggregator: can be as follows, concat: concatenation of hidden state of different layers for each node max-pool: element-wise max-pooling of hidden state of different layers for each node attn: attention mechanism implemented by a single-layered neural network lstm-attn: gru_attn: lstm: gru: """ if self.layer_aggregator == 'concat': input_dim = self.n_layers * self.hidden_dim del self.i_layers, self.j_layers self.i_layers = chainer.ChainList(*[ GraphLinear(in_size=2 * input_dim, out_size=self.out_dim) for _ in range(self.n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(in_size=input_dim, out_size=self.out_dim) for _ in range(self.n_readout_layer) ]) elif self.layer_aggregator == 'max-pool': input_dim = self.hidden_dim if input_dim == self.hidden_dim: return elif self.layer_aggregator == 'attn': input_dim = self.hidden_dim if input_dim == self.hidden_dim: return elif self.layer_aggregator == 'lstm-attn' or 'gru-attn': input_dim = 2 * self.hidden_dim if input_dim == self.hidden_dim: return else: del self.i_layers, self.j_layers self.i_layers = chainer.ChainList(*[ GraphLinear(in_size=2 * input_dim, out_size=self.out_dim) for _ in range(self.n_readout_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(in_size=input_dim, out_size=self.out_dim) for _ in range(self.n_readout_layer) ])
def __init__(self, hidden_dim, out_dim, head, activation=functions.identity): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(ExtremeDeepNieFineCoattention, self).__init__() self.n_lt_layers = 3 with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.attention_layer_1 = GraphLinear(head, 1, nobias=True) self.attention_layer_2 = GraphLinear(head, 1, nobias=True) # self.prev_lt_layer_1 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layers_1 = chainer.ChainList(*[ GraphLinear(hidden_dim, hidden_dim, nobias=False) for _ in range(self.n_lt_layers) ]) # self.prev_lt_layer_2 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layers_2 = chainer.ChainList(*[ GraphLinear(hidden_dim, hidden_dim, nobias=False) for _ in range(self.n_lt_layers) ]) self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True) self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True) self.j_layer = GraphLinear(hidden_dim, out_dim) # modification for concat self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation
def __init__(self, hidden_dim, out_dim, head, activation=functions.identity): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(DeepNieFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.attention_layer_1 = GraphLinear(head, 1, nobias=True) self.attention_layer_2 = GraphLinear(head, 1, nobias=True) self.prev_lt_layer_1 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.prev_lt_layer_2 = GraphLinear(hidden_dim, hidden_dim, nobias=False) self.lt_layer_1 = GraphLinear(hidden_dim, head, nobias=True) self.lt_layer_2 = GraphLinear(hidden_dim, head, nobias=True) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.activation = activation
def __init__(self, n_layers, in_size, out_size, dropout): super(BiGRUAggregator, self).__init__() with self.init_scope(): self.bigru_layer = links.NStepBiGRU(n_layers, in_size, out_size, dropout) self.out_layer = GraphLinear(2 * out_size, out_size) self.n_layers = n_layers self.in_size = in_size self.out_size = out_size self.dropout = dropout
def __init__(self, hidden_dim, out_dim, weight_tying=True): super(GlobalCoattention, self).__init__() n_entities = 1 if weight_tying else 2 with self.init_scope(): self.att_layers = chainer.ChainList( * [L.Linear(2 * hidden_dim, out_dim) for _ in range(n_entities)]) self.lt_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.weight_tying = weight_tying
def __init__(self, hidden_dim, out_dim, activation=functions.tanh): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation """ super(LinearTransformFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.activation = activation
def __init__(self, num_layer=3): super(SchNet, self).__init__() self.num_layer = num_layer with self.init_scope(): self.gn = GraphLinear(512) for l in range(self.num_layer): self.add_link('sch{}'.format(l), SchNetUpdateBN(512)) self.interaction1 = L.Linear(128) self.interaction2 = L.Linear(128) self.interaction3 = L.Linear(4)
def __init__(self, hidden_dim, out_dim, activation=functions.tanh): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism """ super(PoolingFineCoattention, self).__init__() with self.init_scope(): self.energy_layer = links.Bilinear(hidden_dim, hidden_dim, 1) self.j_layer = GraphLinear(hidden_dim, out_dim) self.hidden_dim = hidden_dim self.out_dim = out_dim self.activation = activation
def __init__(self, hidden_dim, out_dim, head, weight_tying=False): """ :param hidden_dim: dimension of atom representation :param out_dim: dimension of molecular representation :param head: number of heads in attention mechanism :param weight_tying: indicate whether the weights should be shared between two attention computation """ super(AlternatingCoattention, self).__init__() self.n_entities = 1 if weight_tying else 2 with self.init_scope(): self.energy_layers_1 = chainer.ChainList(*[ GraphLinear(hidden_dim + out_dim, head) for _ in range(self.n_entities) ]) self.energy_layers_2 = chainer.ChainList(*[GraphLinear(head, 1)]) self.j_layer = GraphLinear( hidden_dim, out_dim) # shared by different molecules self.hidden_dim = hidden_dim self.out_dim = out_dim self.head = head self.weight_tying = weight_tying
def __init__(self, hidden_dim, out_dim, activation=F.relu, weight_tying=True): super(NeuralCoattention, self).__init__() n_entities = 1 if weight_tying else 2 with self.init_scope(): self.att_layers = chainer.ChainList( *[GraphLinear(hidden_dim, out_dim) for _ in range(n_entities)]) self.hidden_dim = hidden_dim self.out_dim = out_dim self.activation = activation self.weight_tying = weight_tying
def __init__(self, out_dim, hidden_dim=16, n_layers=4, concat_hidden=False, nobias=False, activation=functions.tanh): super(GGNNReadout, self).__init__() n_layer = n_layers if concat_hidden else 1 with self.init_scope(): self.i_layers = chainer.ChainList(*[ GraphLinear(None, out_dim, nobias=nobias) for _ in range(n_layer) ]) self.j_layers = chainer.ChainList(*[ GraphLinear(None, out_dim, nobias=nobias) for _ in range(n_layer) ]) self.out_dim = out_dim self.hidden_dim = hidden_dim self.n_layers = n_layers self.concat_hidden = concat_hidden self.nobias = nobias self.activation = activation
def __init__(self, weave_channels=None, hidden_dim=512, n_sub_layer=1, n_atom_types=MAX_ATOMIC_NUM, readout_mode='sum'): weave_channels = weave_channels or WEAVENET_DEFAULT_WEAVE_CHANNELS weave_module = [ WeaveModuleBN(c, n_sub_layer, readout_mode=readout_mode) for c in weave_channels ] super(WeaveNet, self).__init__() with self.init_scope(): self.weave_module = chainer.ChainList(*weave_module) self.gn = GraphLinear(hidden_dim) self.interaction1 = L.Linear(512) self.interaction2 = L.Linear(512) self.interaction3 = L.Linear(512) self.interaction4 = L.Linear(512) self.interaction5 = L.Linear(512) self.interaction6 = L.Linear(4)
def __init__(self, out_channels=64, num_edge_type=4, ch_list=None, n_atom_types=MAX_ATOMIC_NUM, input_type='int', scale_adj=False, activation=F.tanh): super(RelGCN, self).__init__() ch_list = ch_list or [16, 128, 64] # ch_list = [in_channels] + ch_list with self.init_scope(): if input_type == 'int': self.embed = EmbedAtomID(out_size=ch_list[0], in_size=n_atom_types) elif input_type == 'float': self.embed = GraphLinear(None, ch_list[0]) else: raise ValueError("[ERROR] Unexpected value input_type={}".format(input_type)) self.rgcn_convs = chainer.ChainList(*[ RelGCNUpdate(ch_list[i], ch_list[i+1], num_edge_type) for i in range(len(ch_list)-1)]) self.rgcn_readout = RelGCNReadout(ch_list[-1], out_channels) # self.num_relations = num_edge_type self.input_type = input_type self.scale_adj = scale_adj self.activation = activation
def __init__(self, out_dim=64, hidden_channels=None, n_update_layers=None, n_atom_types=MAX_ATOMIC_NUM, n_edge_types=4, input_type='int', scale_adj=False): super(RelGCN, self).__init__() if hidden_channels is None: hidden_channels = [16, 128, 64] elif isinstance(hidden_channels, int): if not isinstance(n_update_layers, int): raise ValueError( 'Must specify n_update_layers when hidden_channels is int') hidden_channels = [hidden_channels] * n_update_layers with self.init_scope(): if input_type == 'int': self.embed = EmbedAtomID(out_size=hidden_channels[0], in_size=n_atom_types) elif input_type == 'float': self.embed = GraphLinear(None, hidden_channels[0]) else: raise ValueError( "[ERROR] Unexpected value input_type={}".format( input_type)) self.rgcn_convs = chainer.ChainList(*[ RelGCNUpdate(hidden_channels[i], hidden_channels[i + 1], n_edge_types) for i in range(len(hidden_channels) - 1) ]) self.rgcn_readout = GGNNReadout(out_dim=out_dim, in_channels=hidden_channels[-1], nobias=True, activation=functions.tanh) # self.num_relations = num_edge_type self.input_type = input_type self.scale_adj = scale_adj
def __init__(self, hidden_channels=300, update_layer=GGNNUpdate, n_update_layers=3, super_node_dim=248): super(ggnn_gwm, self).__init__() hidden_channels = [hidden_channels for _ in range(n_update_layers + 1)] in_channels_list = hidden_channels[:-1] out_channels_list = hidden_channels[1:] assert len(in_channels_list) == n_update_layers assert len(out_channels_list) == n_update_layers with self.init_scope(): self.embed = GraphLinear(None, out_size=hidden_channels[0]) self.update_layers = chainer.ChainList( *[update_layer(in_channels=in_channels_list[i], out_channels=out_channels_list[i], n_edge_types=6) for i in range(n_update_layers)]) self.gwm = GWM(hidden_dim=hidden_channels[0], hidden_dim_super=super_node_dim, n_layers=n_update_layers) self.embed_super = links.Linear(None, out_size=super_node_dim) self.n_update_layers = n_update_layers
def __init__(self, hidden_channels, out_dim, update_layer, # readout_layer, n_update_layers=None, out_channels=None, super_node_dim=None, n_atom_types=MAX_ATOMIC_NUM, n_edge_types=4, dropout_ratio=-1.0, with_gwm=True, concat_hidden=False, sum_hidden=False, weight_tying=False, scale_adj=False, activation=None, use_batchnorm=False, n_activation=None, update_kwargs=None, readout_kwargs=None, gwm_kwargs=None): super(GWMGraphConvModel, self).__init__() # General: length of hidden_channels must be n_layers + 1 if isinstance(hidden_channels, int): if n_update_layers is None: raise ValueError('n_update_layers is None') else: hidden_channels = [hidden_channels for _ in range(n_update_layers + 1)] elif isinstance(hidden_channels, list): if out_channels is None: n_update_layers = len(hidden_channels) - 1 else: n_update_layers = len(hidden_channels) else: raise TypeError('Unexpected value for hidden_channels {}' .format(hidden_channels)) # if readout_layer == GeneralReadout and hidden_channels[-1] != out_dim: # # When use GWM, hidden channels must be same. But GeneralReadout # # cannot change the dimension. So when use General Readout and GWM, # # hidden channel and out_dim should be same. # if with_gwm: # raise ValueError('Unsupported combination.') # else: # hidden_channels[-1] = out_dim # When use with_gwm, concat_hidden, sum_hidden and weight_tying option, # hidden_channels must be same if with_gwm or concat_hidden or sum_hidden or weight_tying: if not all([in_dim == hidden_channels[0] for in_dim in hidden_channels]): raise ValueError( 'hidden_channels must be same but different {}' .format(hidden_channels)) if with_gwm and super_node_dim is None: print('[WARNING] super_node_dim is None, set to {}' .format(hidden_channels[0])) super_node_dim = hidden_channels[0] if out_channels is None: in_channels_list = hidden_channels[:-1] out_channels_list = hidden_channels[1:] else: # For RelGAT concat_heads option in_channels_list = hidden_channels out_channels_list = out_channels assert len(in_channels_list) == n_update_layers assert len(out_channels_list) == n_update_layers n_use_update_layers = 1 if weight_tying else n_update_layers n_readout_layers = n_use_update_layers if concat_hidden or sum_hidden else 1 n_activation = n_use_update_layers if n_activation is None else n_activation if update_kwargs is None: update_kwargs = {} if readout_kwargs is None: readout_kwargs = {} if gwm_kwargs is None: gwm_kwargs = {} with self.init_scope(): self.embed = GraphLinear(None, out_size=hidden_channels[0]) self.update_layers = chainer.ChainList( *[update_layer(in_channels=in_channels_list[i], out_channels=out_channels_list[i], n_edge_types=n_edge_types, **update_kwargs) for i in range(n_use_update_layers)]) # when use weight_tying option, hidden_channels must be same. So we can use -1 index # self.readout_layers = chainer.ChainList( # *[readout_layer(out_dim=out_dim, # # in_channels=hidden_channels[-1], # in_channels=None, # **readout_kwargs) # for _ in range(n_readout_layers)]) if with_gwm: self.gwm = GWM(hidden_dim=hidden_channels[0], hidden_dim_super=super_node_dim, n_layers=n_use_update_layers, **gwm_kwargs) self.embed_super = links.Linear(None, out_size=super_node_dim) self.linear_for_concat_super = links.Linear(in_size=None, out_size=out_dim) if use_batchnorm: self.bnorms = chainer.ChainList( *[GraphBatchNormalization( out_channels_list[i]) for i in range(n_use_update_layers)]) # self.readout_layer = readout_layer self.update_layer = update_layer self.weight_tying = weight_tying self.with_gwm = with_gwm self.concat_hidden = concat_hidden self.sum_hidden = sum_hidden self.scale_adj = scale_adj self.activation = activation self.dropout_ratio = dropout_ratio self.use_batchnorm = use_batchnorm self.n_activation = n_activation self.n_update_layers = n_update_layers self.n_edge_types = n_edge_types
def __init__(self, hidden_dim=16, hidden_dim_super=16, n_layers=4, n_heads=8, dropout_ratio=0.5, concat_hidden=False, tying_flag=False, gpu=-1): super(GWM, self).__init__() num_layer = n_layers if tying_flag: num_layer = 1 with self.init_scope(): # # for Transmitter unit # self.F_super = chainer.ChainList( *[L.Linear(in_size=hidden_dim_super, out_size=hidden_dim_super) for _ in range(num_layer)] ) self.V_super = chainer.ChainList( *[L.Linear(hidden_dim * n_heads, hidden_dim * n_heads) for _ in range(num_layer)] ) self.W_super = chainer.ChainList( *[L.Linear(hidden_dim * n_heads, hidden_dim_super) for _ in range(num_layer)] ) self.B = chainer.ChainList( *[GraphLinear(n_heads * hidden_dim, n_heads * hidden_dim_super) for _ in range(num_layer)] ) # # for Warp Gate unit # self.gate_dim = hidden_dim self.H_local = chainer.ChainList( *[GraphLinear(in_size=hidden_dim, out_size=self.gate_dim) for _ in range(num_layer)] ) self.G_local = chainer.ChainList( *[GraphLinear(in_size=hidden_dim_super, out_size=self.gate_dim) for _ in range(num_layer)] ) self.gate_dim_super = hidden_dim_super self.H_super = chainer.ChainList( *[L.Linear(in_size=hidden_dim, out_size=self.gate_dim_super) for _ in range(num_layer)] ) self.G_super = chainer.ChainList( *[L.Linear(in_size=hidden_dim_super, out_size=self.gate_dim_super) for _ in range(num_layer)] ) # GRU's. not layer-wise (recurrent through layers) self.GRU_local = L.GRU(in_size=hidden_dim, out_size=hidden_dim) self.GRU_super = L.GRU(in_size=hidden_dim_super, out_size=hidden_dim_super) # end init_scope-with self.hidden_dim = hidden_dim self.hidden_dim_super = hidden_dim_super self.n_layers = n_layers self.n_heads = n_heads self.dropout_ratio = dropout_ratio self.concat_hidden = concat_hidden self.tying_flag = tying_flag