def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward( feedfoward_input_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) # Note: Please use `ModuleList` in new code. It provides better # support for running on multiple GPUs. We've kept `add_module` here # solely for backwards compatibility with existing serialized models. self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, word_embeddings: TextFieldEmbedder, bin_count: int): super(DRMM, self).__init__() self.word_embeddings = word_embeddings self.cosine_module = CosineMatrixAttention() self.bin_count = bin_count self.matching_classifier = FeedForward( input_dim=bin_count, num_layers=2, hidden_dims=[bin_count, 1], activations=[ Activation.by_name('tanh')(), Activation.by_name('tanh')() ]) self.query_gate = FeedForward( input_dim=self.word_embeddings.get_output_dim(), num_layers=2, hidden_dims=[self.word_embeddings.get_output_dim(), 1], activations=[ Activation.by_name('tanh')(), Activation.by_name('tanh')() ]) self.query_softmax = MaskedSoftmax()
def __init__(self, output_dim: int, word_embedder: Embeddings, char_embedder: Embeddings, filters: List[Tuple[int, int]], n_highway: int, activation: str): super(ConvTokenEmbedder, self).__init__(output_dim, word_embedder, char_embedder) self.emb_dim = 0 if word_embedder is not None: self.emb_dim += word_embedder.n_d if char_embedder is not None: self.convolutions = [] char_embed_dim = char_embedder.n_d for i, (width, num) in enumerate(filters): conv = torch.nn.Conv1d(in_channels=char_embed_dim, out_channels=num, kernel_size=width, bias=True) self.convolutions.append(conv) self.convolutions = torch.nn.ModuleList(self.convolutions) self.n_filters = sum(f[1] for f in filters) self.n_highway = n_highway self.highways = Highway(self.n_filters, self.n_highway, activation=Activation.by_name("relu")()) self.emb_dim += self.n_filters self.activation = Activation.by_name(activation)() self.projection = torch.nn.Linear(self.emb_dim, self.output_dim, bias=True)
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2VecEncoder, dropout_p: int, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.embedding2input = FeedForward( input_dim=word_embeddings.get_output_dim(), num_layers=1, hidden_dims=encoder.get_input_dim(), activations=Activation.by_name('relu')(), dropout=dropout_p) self.encoder = encoder self.hidden2intermediate = FeedForward( input_dim=encoder.get_output_dim(), num_layers=1, hidden_dims=int(encoder.get_output_dim() / 2), activations=Activation.by_name('relu')(), dropout=dropout_p) self.intermediate2tag = nn.Linear( in_features=int(encoder.get_output_dim() / 2), out_features=vocab.get_vocab_size('labels')) self.accuracy = CategoricalAccuracy() self.loss_function = torch.nn.CrossEntropyLoss()
def __init__( self, input_dim: int, hidden_dim: int, attention_projection_dim: int, feedforward_hidden_dim: int, num_convs: int, conv_kernel_size: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, layer_dropout_undecayed_prob: float = 0.1, attention_dropout_prob: float = 0, ) -> None: super().__init__() check_dimensions_match(input_dim, hidden_dim, "input_dim", "hidden_dim") self._use_positional_encoding = use_positional_encoding self._conv_norm_layers = torch.nn.ModuleList( [LayerNorm(hidden_dim) for _ in range(num_convs)] ) self._conv_layers = torch.nn.ModuleList() for _ in range(num_convs): padding = torch.nn.ConstantPad1d( (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0 ) depthwise_conv = torch.nn.Conv1d( hidden_dim, hidden_dim, conv_kernel_size, groups=hidden_dim ) pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1) self._conv_layers.append( torch.nn.Sequential( padding, depthwise_conv, pointwise_conv, Activation.by_name("relu")() ) ) self.attention_norm_layer = LayerNorm(hidden_dim) self.attention_layer = MultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=attention_projection_dim, values_dim=attention_projection_dim, attention_dropout_prob=attention_dropout_prob, ) self.feedforward_norm_layer = LayerNorm(hidden_dim) self.feedforward = FeedForward( hidden_dim, activations=[Activation.by_name("relu")(), Activation.by_name("linear")()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob, ) self.dropout = Dropout(dropout_prob) self.residual_with_layer_dropout = ResidualWithLayerDropout(layer_dropout_undecayed_prob) self._input_dim = input_dim self._output_dim = hidden_dim
def __init__( self, input_dim, hidden_dim, projection_dim, feedforward_hidden_dim, num_layers, num_attention_heads, use_positional_encoding=True, dropout_prob=0.2, ): super(MaskedStackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers = [] self._feedfoward_layers = [] self._layer_norm_layers = [] self._feed_forward_layer_norm_layers = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward( feedfoward_input_dim, activations=[ Activation.by_name("relu")(), Activation.by_name("linear")() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob, ) self.add_module("feedforward_{i}".format(feedfoward)) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_input_dim()) self.add_module( "feedforward_layer_norm_{i}".format(feedforward_layer_norm)) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MaskedMultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, ) self.add_module("self_attention_{i}".format(self_attention)) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_input_dim()) self.add_module("layer_norm_{i}".format(layer_norm)) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = torch.nn.Dropout(dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim() self._output_layer_norm = LayerNorm(self._output_dim)
def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward( feedfoward_input_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, activation: str = 'relu', dim: int = 1, bias: bool = True) -> None: super().__init__() if dim == 1: padding_left = kernel_size // 2 padding_right = padding_left if kernel_size % 2 != 0 else padding_left - 1 self.depthwise_conv = torch.nn.Sequential( torch.nn.ReflectionPad1d((padding_left, padding_right)), torch.nn.Conv1d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels, bias=bias)) self.pointwise_conv = torch.nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias) elif dim == 2: if isinstance(kernel_size, collections.Iterable): kernel_1, kernel_2 = kernel_size padding_1_left = kernel_1 // 2 padding_1_right = padding_1_left if kernel_1 % 2 != 0 else padding_1_left - 1 padding_2_left = kernel_2 // 2 padding_2_right = padding_2_left if kernel_2 % 2 != 0 else padding_2_left - 1 padding = (padding_1_left, padding_1_right, padding_2_left, padding_2_right) else: padding_left = kernel_size // 2 padding_right = padding_left if kernel_size % 2 != 0 else padding_left - 1 padding = (padding_left, padding_right, padding_left, padding_right) self.depthwise_conv = torch.nn.Sequential( torch.nn.ReflectionPad2d(padding), torch.nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels, bias=bias)) self.pointwise_conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias) else: raise Exception( f"We currently only handle 1 and 2 dimensional convolutions here. You gave {dim}." ) if activation is not None: self._activation = Activation.by_name(activation)() else: self._activation = Activation.by_name("linear")()
def __init__(self, input_dim: int, hidden_dim: int, attention_projection_dim: int, feedforward_hidden_dim: int, num_convs: int, conv_kernel_size: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, layer_dropout_undecayed_prob: float = 0.1, attention_dropout_prob: float = 0) -> None: super().__init__() check_dimensions_match(input_dim, hidden_dim, 'input_dim', 'hidden_dim') self._use_positional_encoding = use_positional_encoding self._conv_norm_layers = torch.nn.ModuleList( [LayerNorm(hidden_dim) for _ in range(num_convs)]) self._conv_layers = torch.nn.ModuleList([ DepthwiseSeparableConv(hidden_dim, hidden_dim, conv_kernel_size, activation="relu", dim=1) for _ in range(num_convs) ]) self.attention_norm_layer = LayerNorm(hidden_dim) self.attention_layer = MemoryEfficientMultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=attention_projection_dim, values_dim=attention_projection_dim, attention_dropout_prob=attention_dropout_prob) self.feedforward_norm_layer = LayerNorm(hidden_dim) self.feedforward = FeedForward( hidden_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.dropout = Dropout(dropout_prob) self.residual_with_layer_dropout = ResidualWithLayerDropout( layer_dropout_undecayed_prob) self._input_dim = input_dim self._output_dim = hidden_dim
def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward(feedfoward_input_dim, activations=[Activation.by_name('relu')(), Activation.by_name('linear')()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, iterations: int = 1, dropout: float = 0.3, stright_through: bool = False, hidden_dim: int = 40, corruption_rate: float = 0.1, corruption_iterations: int = 1, testing_iterations: int = 5, gumbel_t: float = 0.0, weight_tie: bool = False, activation: str = "sigmoid", use_predicate_rep: bool = False, subtract_gold: bool = False, fw_update: float = 0, graph_type: int = 0) -> None: super(SRLRefiner, self).__init__() self.gumbel_t = gumbel_t self.stright_through = stright_through self.iterations = iterations self.hidden_dim = hidden_dim self._dropout = Dropout(dropout) self.testing_iterations = testing_iterations self.dropout = dropout self.corruption_rate = corruption_rate self._corrupt_mask = lambda x: torch.bernoulli( x.data.new(x.data.size()[:-1]).fill_(1 - self.corruption_rate) ).unsqueeze(-1) self.corruption_iterations = corruption_iterations self.weight_tie = weight_tie self.activation = Activation.by_name(activation)() self.use_predicate_rep = use_predicate_rep self.subtract_gold = subtract_gold self.graph_type = graph_type self.fw_update = fw_update
def __init__(self, input_dim: int, hidden_dim: int, activation: Activation = None) -> None: super().__init__() self._weight_matrix = nn.Linear(input_dim, hidden_dim) self._activation = activation or Activation.by_name('relu')()
def __init__(self, vocab_size, embed_size, projection, hidden_size=None, activation: Activation = Activation.by_name('linear'), pre_embed=None): super(EncoderAverage, self).__init__() self.vocab_size = vocab_size self.embed_size = embed_size if pre_embed is not None: print("Setting Embedding") weight = torch.Tensor(pre_embed) weight[0, :].zero_() self.embedding = nn.Embedding(vocab_size, embed_size, _weight=weight, padding_idx=0) else: self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx=0) if projection: self.projection = nn.Linear(embed_size, hidden_size) self.output_size = hidden_size else: self.projection = lambda s: s self.output_size = embed_size self.activation = activation
def __init__(self, num_layers: int, in_channel: int, hidden_channel: int, kernel_size: int = 3, stride: int = 2, nonlinearity: Activation = Activation.by_name('linear')()): super(CNN, self).__init__() self._in_channel = in_channel self._hidden_channel = hidden_channel self._kernel_size = kernel_size self._stride = stride self._num_layers = num_layers layers = [] for l in range(num_layers): in_channel = self._in_channel if l == 0 else self._hidden_channel conv = LengthAwareWrapper( nn.Conv2d(in_channel, self._hidden_channel, self._kernel_size, stride=self._stride, padding=1)) bn = LengthAwareWrapper(nn.BatchNorm2d(self._hidden_channel), pass_through=True) layers.append((f"conv{l}", conv)) layers.append((f"bn{l}", bn)) layers.append((f"nonlinear{l}", LengthAwareWrapper(nonlinearity, pass_through=True))) self.module = nn.Sequential(OrderedDict(layers)) strides = [ self.module[idx].stride for idx in range(len(self.module)) if hasattr(self.module[idx], "stride") ] self._downsample_rate = reduce(lambda x, y: x * y, strides)
def __init__(self, input_dim: int, activation: Optional[Activation] = None, normalise: bool = True) -> None: super().__init__(normalise) self._weights = torch.nn.Linear(in_features=input_dim, out_features=input_dim) self._activation = activation or Activation.by_name('relu')()
def from_params(cls, params: Params) -> 'LinearMatrixAttention': tensor_1_dim = params.pop_int("tensor_1_dim") tensor_2_dim = params.pop_int("tensor_2_dim") combination = params.pop("combination", "x,y") activation = Activation.by_name(params.pop("activation", "linear"))() params.assert_empty(cls.__name__) return cls(tensor_1_dim=tensor_1_dim, tensor_2_dim=tensor_2_dim, combination=combination, activation=activation)
def initialize_network(self, n_tags: int, sense_dim: int, rep_dim: int): self.n_tags = n_tags self._arc_tag_arg_enc = Linear(rep_dim, self.hidden_dim) if self.use_predicate_rep: self._arc_tag_pred_enc = Linear(rep_dim, self.hidden_dim) if self.graph_type != 2: self._arc_tag_sense_enc = Linear(sense_dim, self.hidden_dim) if self.graph_type == 1: self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim) elif self.graph_type == 2: self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim) else: self._arc_tag_tags_enc = Linear(2 * n_tags + 1, self.hidden_dim) if self.weight_tie: self.arc_tag_refiner = lambda x: x.matmul(self._arc_tag_tags_enc. weight[:, :n_tags + 1]) if self.graph_type != 2: self.predicate_linear = Linear(rep_dim + n_tags + sense_dim, self.hidden_dim) else: self.predicate_linear = Linear(rep_dim + sense_dim, self.hidden_dim) self.predicte_refiner = lambda x: self._dropout(self.activation(self.predicate_linear(x)))\ .matmul(self.predicate_linear.weight[:,:sense_dim]) else: self.arc_tag_refiner = FeedForward(self.hidden_dim, 1, n_tags + 1, Activation.by_name("linear")(), dropout=self.dropout) self.predicte_refiner = FeedForward( rep_dim + n_tags + sense_dim, 2, [self.hidden_dim] + [sense_dim], [self.activation] + [Activation.by_name("linear")()], dropout=self.dropout)
def __init__(self, tensor_1_dim: int, tensor_2_dim: int, combination: str = 'x,y', activation: Activation = None) -> None: super().__init__() self._combination = combination combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) self._weight_vector = Parameter(torch.Tensor(combined_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name('linear')() self.reset_parameters()
def from_params(cls, params: Params) -> 'Attention': tensor_1_dim = params.pop_int("tensor_1_dim") tensor_2_dim = params.pop_int("tensor_2_dim") combination = params.pop("combination", "x,y") activation = Activation.by_name(params.pop("activation", "linear"))() normalize = params.pop_bool('normalize', True) params.assert_empty(cls.__name__) return cls(normalize=normalize, tensor_1_dim=tensor_1_dim, tensor_2_dim=tensor_2_dim, combination=combination, activation=activation)
def __init__(self, vocab: Vocabulary, encoder_dim: int, label_dim: int, edge_dim: int, activation: Activation = None) -> None: """ Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. encoder_dim : ``int``, required. The output dimension of the encoder. label_dim : ``int``, required. The dimension of the hidden layer of the MLP used for predicting the edge labels. edge_dim : ``int``, required. The dimension of the hidden layer of the MLP used for predicting edge existence. activation : ``Activation``, optional, (default = tanh). The activation function used in the MLPs. dropout : ``float``, optional, (default = 0.0) The variational dropout applied to the output of the encoder and MLP layers. """ super(KGEdges, self).__init__(vocab) self._encoder_dim = encoder_dim if activation is None: self.activation = Activation.by_name("tanh")() else: self.activation = activation #edge existence: #these two matrices together form the feed forward network which takes the vectors of the two words in question and makes predictions from that #this is the trick described by Kiperwasser and Goldberg to make training faster. self.head_arc_feedforward = torch.nn.Linear(encoder_dim, edge_dim) self.child_arc_feedforward = torch.nn.Linear( encoder_dim, edge_dim, bias=False) #bias is already added by head_arc_feedforward self.arc_out_layer = torch.nn.Linear( edge_dim, 1, bias=False) # K&G don't use a bias for the output layer #edge labels: num_labels = vocab.get_vocab_size("head_tags") # = edge labels #same trick again self.head_label_feedforward = torch.nn.Linear(encoder_dim, label_dim) self.child_label_feedforward = torch.nn.Linear(encoder_dim, label_dim, bias=False) self.label_out_layer = torch.nn.Linear( edge_dim, num_labels) #output layer for edge labels
def __init__(self, hdim: int = 768, nlayers: int = 2, dropout_prob: int = 0.1): super(GCNNet, self).__init__() # self.gcns = nn.ModuleList([GCN(hdim, hdim, F.relu) for i in range(nlayers)]) self._gcn_layers = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim, feedforward_hidden_dim, hidden_dim = hdim, hdim, hdim for i in range(nlayers): feedfoward = FeedForward(feedfoward_input_dim, activations=[Activation.by_name('relu')(), Activation.by_name('linear')()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) # Note: Please use `ModuleList` in new code. It provides better # support for running on multiple GPUs. We've kept `add_module` here # solely for backwards compatibility with existing serialized models. self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) gcn = GCN(hdim, hdim, F.relu) self.add_module(f"gcn_{i}", gcn) self._gcn_layers.append(gcn) layer_norm = LayerNorm(hdim) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(dropout_prob) self._input_dim = hdim self._output_dim = hdim
def __init__(self, tensor_1_dim, tensor_2_dim, combination=u'x,y', activation=None): super(LinearMatrixAttention, self).__init__() self._combination = combination combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) self._weight_vector = Parameter(torch.Tensor(combined_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name(u'linear')() self.reset_parameters()
def __init__(self, u_input_dim: int, v_input_dim: int, projection_dim: int, activation: Optional[Activation] = None) -> None: super(HeterogenousSequenceAttention, self).__init__() self._output_dim = projection_dim self._u_input_dim = u_input_dim self._v_input_dim = v_input_dim self._u_projection = torch.nn.Linear(in_features=u_input_dim, out_features=projection_dim) self._v_projection = torch.nn.Linear(in_features=v_input_dim, out_features=projection_dim) self._activation = activation or Activation.by_name('relu')()
def __init__(self, params: Params, vocab: Vocabulary) -> None: super().__init__(vocab=vocab) enc_hidden_dim = params.pop_int('enc_hidden_dim', 300) disc_hidden_dim = params.pop_int('disc_hidden_dim', 1200) disc_num_layers = params.pop_int('disc_num_layers', 1) emb_dropout = params.pop_float('emb_dropout', 0.0) disc_dropout = params.pop_float('disc_dropout', 0.0) l2_weight = params.pop_float('l2_weight', 0.0) self.emb_dropout = nn.Dropout(emb_dropout) self.disc_dropout = nn.Dropout(disc_dropout) self._l2_weight = l2_weight self._token_embedder = Embedding.from_params( vocab=vocab, params=params.pop('token_embedder')) self._discriminator_encoder = PytorchSeq2VecWrapper( nn.LSTM(input_size=self._token_embedder.get_output_dim(), hidden_size=enc_hidden_dim, batch_first=True)) self._discriminator = FeedForward( input_dim=4 * self._discriminator_encoder.get_output_dim(), hidden_dims=[disc_hidden_dim] * disc_num_layers + [self._NUM_LABELS], num_layers=disc_num_layers + 1, activations=[Activation.by_name('relu')()] * disc_num_layers + [Activation.by_name('linear')()]) # Metrics self._metrics = { 'labeled': { 'discriminator_entropy': ScalarMetric(), 'discriminator_accuracy': CategoricalAccuracy(), 'loss': ScalarMetric() } }
def __init__(self, word_embeddings: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout_p: int, vocab: Vocabulary) -> None: super().__init__(vocab) self.word_embeddings = word_embeddings self.embedding2input = FeedForward( input_dim=word_embeddings.get_output_dim(), num_layers=1, hidden_dims=encoder.get_input_dim(), activations=Activation.by_name('relu')(), dropout=dropout_p) self.encoder = encoder self.hidden2intermediate = FeedForward( input_dim=encoder.get_output_dim(), num_layers=1, hidden_dims=int(encoder.get_output_dim() / 2), activations=Activation.by_name('relu')(), dropout=dropout_p) self.intermediate2tag = nn.Linear( in_features=int(encoder.get_output_dim() / 2), out_features=vocab.get_vocab_size('labels')) # self.accuracy = CategoricalAccuracy() label_vocab = vocab.get_token_to_index_vocabulary('labels').copy() # print("label_vocab: ", label_vocab) [label_vocab.pop(x) for x in ['O', 'OR']] labels_for_metric = list(label_vocab.values()) # print("labels_for_metric: ", labels_for_metric) self.accuracy = CustomFBetaMeasure(beta=1.0, average='micro', labels=labels_for_metric)
def __init__( self, tensor_1_dim: int, tensor_2_dim: int, combination: str = "x,y", activation: Activation = None, normalize: bool = True, ) -> None: super().__init__(normalize) self._combination = combination combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim]) self._weight_vector = Parameter(torch.Tensor(combined_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name("linear")() self.reset_parameters()
def __init__(self, vocab: Vocabulary, title_embedder: TextFieldEmbedder, abstract_embedder: TextFieldEmbedder, dense_dim=75) -> None: super().__init__(vocab) self.title_embedder = title_embedder self.abstract_embedder = abstract_embedder self.intermediate_dim = 6 self.n_layers = 3 self.layer_dims = [dense_dim for i in range(self.n_layers - 1)] self.layer_dims.append(1) self.activations = [ Activation.by_name("elu")(), Activation.by_name("elu")(), Activation.by_name("sigmoid")() ] self.layers = FeedForward(input_dim=self.intermediate_dim, num_layers=self.n_layers, hidden_dims=self.layer_dims, activations=self.activations)
def __init__(self, embedding_dim: int, pooling: str = "sum", projection_dim: Optional[int] = None, activation: Optional[str] = None) -> None: super().__init__() self._embedding_dim = embedding_dim self._pooling = pooling self._projection_dim = projection_dim self._activation = Activation.by_name(activation) if activation else None self._projection = (torch.nn.Linear(self._embedding_dim, self._projection_dim) if projection_dim else lambda x: x)
def __init__( self, hidden_dim: int, tag_dim: int, activation: str, embedding_dim: int, ): super(ContinuousEncoder, self).__init__() self.hidden_dim = hidden_dim self.tag_dim = tag_dim self.activation = Activation.by_name(activation)() self.embedding_dim = embedding_dim # ============= Covariance matrix & Mean vector ================ interm_layer_size = (self.embedding_dim + self.hidden_dim) // 2 self.linear_layer = nn.Linear(self.embedding_dim, interm_layer_size) self.linear_layer3 = nn.Linear(interm_layer_size, self.hidden_dim) self.hidden2mean = nn.Linear(self.hidden_dim, self.tag_dim) self.hidden2std = nn.Linear(self.hidden_dim, self.tag_dim)
def __init__(self, vocab_size, embed_size, hidden_size, kernel_sizes, activation: Activation = Activation.by_name('relu'), pre_embed=None): super(EncoderCNN, self).__init__() self.vocab_size = vocab_size self.embed_size = embed_size if pre_embed is not None: print("Setting Embedding") weight = torch.Tensor(pre_embed) weight[0, :].zero_() self.embedding = nn.Embedding(vocab_size, embed_size, _weight=weight, padding_idx=0) else: self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx=0) self.hidden_size = hidden_size convs = {} for i in range(len(kernel_sizes)): convs[str(i)] = nn.Conv1d(embed_size, hidden_size, kernel_sizes[i], padding=int((kernel_sizes[i] - 1) // 2)) self.convolutions = nn.ModuleDict(convs) self.activation = activation self.output_size = hidden_size * len(kernel_sizes)
def __init__(self, output_dim: int, embeddings: Embeddings, filters: List[Tuple[int, int]], n_highway: int, activation: str, use_cuda: bool, input_field_name: str = None): super(ConvTokenEmbedder, self).__init__(input_field_name) self.embeddings = embeddings self.output_dim = output_dim self.use_cuda = use_cuda self.filters = filters convolutions = [] for i, (width, num) in enumerate(filters): conv = torch.nn.Conv1d(in_channels=embeddings.n_d, out_channels=num, kernel_size=width, bias=True) convolutions.append(conv) self.convolutions = torch.nn.ModuleList(convolutions) self.n_filters = sum(f[1] for f in filters) self.n_highway = n_highway self.highways = Highway(self.n_filters, self.n_highway, activation=torch.nn.functional.relu) self.activation = Activation.by_name(activation)() self.projection = torch.nn.Linear(self.n_filters, output_dim, bias=True) self.reset_parameters()