def __init__(self, channels: int = 4, units: int = 10, activation: typing.Union[str, typing.Type[nn.Module], nn.Module] = 'tanh', recurrent_activation: typing.Union[str, typing.Type[nn.Module], nn.Module] = 'sigmoid', direction: str = 'lt'): """:class:`SpatialGRU` constructor.""" super().__init__() self._units = units self._activation = parse_activation(activation) self._recurrent_activation = parse_activation(recurrent_activation) self._direction = direction self._channels = channels if self._direction not in ('lt', 'rb'): raise ValueError(f"Invalid direction. " f"`{self._direction}` received. " f"Must be in `lt`, `rb`.") self._input_dim = self._channels + 3 * self._units self._wr = nn.Linear(self._input_dim, self._units * 3) self._wz = nn.Linear(self._input_dim, self._units * 4) self._w_ij = nn.Linear(self._channels, self._units) self._U = nn.Linear(self._units * 3, self._units, bias=False) self.reset_parameters()
def build(self): """Build model structure.""" self.embedding = self._make_default_embedding_layer() self.q_convs = nn.ModuleList() self.d_convs = nn.ModuleList() for i in range(self._params['max_ngram']): conv = nn.Sequential( nn.ConstantPad1d((0, i), 0), nn.Conv1d(in_channels=self._params['embedding_output_dim'], out_channels=self._params['filters'], kernel_size=i + 1), parse_activation(self._params['conv_activation_func'])) self.q_convs.append(conv) self.d_convs.append(conv) self.kernels = nn.ModuleList() for i in range(self._params['kernel_num']): mu = 1. / (self._params['kernel_num'] - 1) + (2. * i) / (self._params['kernel_num'] - 1) - 1.0 sigma = self._params['sigma'] if mu > 1.0: sigma = self._params['exact_sigma'] mu = 1.0 self.kernels.append(GaussianKernel(mu=mu, sigma=sigma)) dim = self._params['max_ngram']**2 * self._params['kernel_num'] self.out = self._make_output_layer(dim)
def build(self): """Build model structure.""" self.lm_conv1d = nn.Conv1d(in_channels=self._params['right_length'], out_channels=self.params['lm_filters'], kernel_size=1, stride=1) lm_mlp_size = self._params['left_length'] * self._params['lm_filters'] self.lm_mlp = self._make_multi_layer_perceptron_layer(lm_mlp_size) self.lm_linear = self._make_perceptron_layer( in_features=self._params['mlp_num_fan_out'], out_features=1) self.dm_conv_activation_func = parse_activation( self._params['dm_conv_activation_func']) self.dm_conv_left = nn.Conv1d(self._params['vocab_size'], self._params['dm_filters'], self._params['dm_kernel_size']) self.dm_mlp_left = self._make_perceptron_layer( in_features=self._params['dm_filters'], out_features=self._params['dm_filters']) self.dm_conv1_right = nn.Conv1d(self._params['vocab_size'], self._params['dm_filters'], self._params['dm_kernel_size']) self.dm_conv2_right = nn.Conv1d(self._params['dm_filters'], self._params['dm_filters'], 1) dm_mp_size = ((self._params['right_length'] - self._params['dm_kernel_size'] + 1) // (self._params['dm_right_pool_size']) * self._params['dm_filters']) self.dm_mlp = self._make_multi_layer_perceptron_layer(dm_mp_size) self.dm_linear = self._make_perceptron_layer( in_features=self._params['mlp_num_fan_out'], out_features=1) self.dropout = nn.Dropout(self._params['dropout_rate']) self.out = self._make_output_layer(1)
def _create_base_network(self) -> nn.Module: """ Apply conv and maxpooling operation towards to each letter-ngram. The input shape is `fixed_text_length`*`number of letter-ngram`, as described in the paper, `n` is 3, `number of letter-trigram` is about 30,000 according to their observation. :return: A :class:`nn.Module` of CDSSM network, tensor in tensor out. """ pad = nn.ConstantPad1d((0, self._params['kernel_size'] - 1), 0) conv = nn.Conv1d( in_channels=self._params['vocab_size'], out_channels=self._params['filters'], kernel_size=self._params['kernel_size'] ) activation = parse_activation( self._params['conv_activation_func'] ) dropout = nn.Dropout(p=self._params['dropout_rate']) pool = nn.AdaptiveMaxPool1d(1) squeeze = Squeeze() mlp = self._make_multi_layer_perceptron_layer( self._params['filters'] ) return nn.Sequential( pad, conv, activation, dropout, pool, squeeze, mlp )
def build(self): """ Build model structure. MatchPyramid text matching as image recognition. """ self.embedding = self._make_default_embedding_layer() # Interaction self.matching = Matching(matching_type='dot') # Build conv activation = parse_activation(self._params['activation']) in_channel_2d = [1, *self._params['kernel_count'][:-1]] conv2d = [ self._make_conv_pool_block(ic, oc, ks, activation) for ic, oc, ks, in zip(in_channel_2d, self._params['kernel_count'], self._params['kernel_size']) ] self.conv2d = nn.Sequential(*conv2d) # Dynamic Pooling self.dpool_layer = nn.AdaptiveAvgPool2d(self._params['dpool_size']) self.dropout = nn.Dropout(p=self._params['dropout_rate']) left_length = self._params['dpool_size'][0] right_length = self._params['dpool_size'][1] # Build output self.out = self._make_output_layer(left_length * right_length * self._params['kernel_count'][-1])
def build(self): """ Build model structure. ArcI use Siamese arthitecture. """ self.embedding = self._make_default_embedding_layer() # Build conv activation = parse_activation(self._params['conv_activation_func']) left_in_channels = [ self._params['embedding_output_dim'], *self._params['left_filters'][:-1] ] right_in_channels = [ self._params['embedding_output_dim'], *self._params['right_filters'][:-1] ] conv_left = [ self._make_conv_pool_block(ic, oc, ks, activation, ps) for ic, oc, ks, ps in zip(left_in_channels, self._params['left_filters'], self._params['left_kernel_sizes'], self._params['left_pool_sizes']) ] conv_right = [ self._make_conv_pool_block(ic, oc, ks, activation, ps) for ic, oc, ks, ps in zip(right_in_channels, self._params['right_filters'], self._params['right_kernel_sizes'], self._params['right_pool_sizes']) ] self.conv_left = nn.Sequential(*conv_left) self.conv_right = nn.Sequential(*conv_right) self.dropout = nn.Dropout(p=self._params['dropout_rate']) left_length = self._params['left_length'] right_length = self._params['right_length'] for ps in self._params['left_pool_sizes']: left_length = left_length // ps for ps in self._params['right_pool_sizes']: right_length = right_length // ps self.mlp = self._make_multi_layer_perceptron_layer( left_length * self._params['left_filters'][-1] + ( right_length * self._params['right_filters'][-1]) ) self.out = self._make_output_layer( self._params['mlp_num_fan_out'] )
def build(self): """ Build model structure. ArcII has the desirable property of letting two sentences meet before their own high-level representations mature. """ self.embedding = self._make_default_embedding_layer() # Phrase level representations self.conv1d_left = nn.Sequential( nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0), nn.Conv1d(in_channels=self._params['embedding_output_dim'], out_channels=self._params['kernel_1d_count'], kernel_size=self._params['kernel_1d_size'])) self.conv1d_right = nn.Sequential( nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0), nn.Conv1d(in_channels=self._params['embedding_output_dim'], out_channels=self._params['kernel_1d_count'], kernel_size=self._params['kernel_1d_size'])) # Interaction self.matching = Matching(matching_type='plus') # Build conv activation = parse_activation(self._params['activation']) in_channel_2d = [ self._params['kernel_1d_count'], *self._params['kernel_2d_count'][:-1] ] conv2d = [ self._make_conv_pool_block(ic, oc, ks, activation, ps) for ic, oc, ks, ps in zip(in_channel_2d, self._params['kernel_2d_count'], self._params['kernel_2d_size'], self._params['pool_2d_size']) ] self.conv2d = nn.Sequential(*conv2d) self.dropout = nn.Dropout(p=self._params['dropout_rate']) left_length = self._params['left_length'] right_length = self._params['right_length'] for ps in self._params['pool_2d_size']: left_length = left_length // ps[0] for ps in self._params['pool_2d_size']: right_length = right_length // ps[1] # Build output self.out = self._make_output_layer(left_length * right_length * self._params['kernel_2d_count'][-1])
def _make_output_layer(self, in_features: int = 0) -> nn.Module: """:return: a correctly shaped torch module for model output.""" task = self._params['task'] if isinstance(task, tasks.Classification): out_features = task.num_classes elif isinstance(task, tasks.Ranking): out_features = 1 else: raise ValueError(f"{task} is not a valid task type. " f"Must be in `Ranking` and `Classification`.") if self._params['out_activation_func']: return nn.Sequential( nn.Linear(in_features, out_features), parse_activation(self._params['out_activation_func'])) else: return nn.Linear(in_features, out_features)
def _make_multi_layer_perceptron_layer(self, in_features) -> nn.Module: """:return: a multiple layer perceptron.""" if not self._params['with_multi_layer_perceptron']: raise AttributeError( 'Parameter `with_multi_layer_perception` not set.') activation = parse_activation(self._params['mlp_activation_func']) mlp_sizes = [ in_features, *self._params['mlp_num_layers'] * [self._params['mlp_num_units']], self._params['mlp_num_fan_out'] ] mlp = [ self._make_perceptron_layer(in_f, out_f, activation) for in_f, out_f in zip(mlp_sizes, mlp_sizes[1:]) ] return nn.Sequential(*mlp)
def _make_output_layer( self, in_features: int = 0, activation: typing.Union[str, nn.Module] = None) -> nn.Module: """:return: a correctly shaped torch module for model output.""" task = self._params['task'] if isinstance(task, tasks.Classification): return nn.Sequential(nn.Linear(in_features, task.num_classes), nn.Softmax(dim=-1)) elif isinstance(task, tasks.Ranking): if activation: return nn.Sequential(nn.Linear(in_features, 1), parse_activation(activation)) else: return nn.Linear(in_features, 1) else: raise ValueError(f"{task} is not a valid task type. " f"Must be in `Ranking` and `Classification`.")
def build(self): """ Build model structure. aNMM: Ranking Short Answer Texts with Attention-Based Neural Matching Model. """ self.embedding = self._make_default_embedding_layer() # QA Matching self.matching = Matching(matching_type='dot', normalize=True) # Value-shared Weighting activation = parse_activation(self._params['activation']) in_hidden_size = [ self._params['num_bins'], *self._params['hidden_sizes'] ] out_hidden_size = [ *self._params['hidden_sizes'], 1 ] hidden_layers = [ nn.Sequential( nn.Linear(in_size, out_size), activation ) for in_size, out_size, in zip( in_hidden_size, out_hidden_size ) ] self.hidden_layers = nn.Sequential(*hidden_layers) # Query Attention self.q_attention = Attention(self._params['embedding_output_dim']) self.dropout = nn.Dropout(p=self._params['dropout_rate']) # Build output self.out = self._make_output_layer(1)