def __init__(self, state_space, channels, action_space, epsilon=0.99, epsilon_min=0.01, epsilon_decay=0.99, gamma=0.9, learning_rate=0.01): super(Agent, self).__init__() self.action_space = action_space self.state_space = state_space self.channels = channels self.learning_rate = learning_rate self.epsilon = epsilon self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.gamma = gamma self.conv1 = Conv2d(self.channels, 32, 8) self.conv2 = Conv2d(32, 64, 4) self.conv3 = Conv2d(64, 128, 3) self.fc1 = Linear(128 * 52 * 52, 64) self.fc2 = Linear(64, 32) self.output = Linear(32, action_space) self.loss_fn = MSELoss() self.optimizer = Adam(self.parameters(), lr=self.learning_rate)
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, qarg_ffnn: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(ClauseAndSpanToAnswerSlotModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._qarg_ffnn = qarg_ffnn self._clause_embedding = Embedding( vocab.get_vocab_size("abst-clause-labels"), self._qarg_ffnn.get_input_dim()) self._span_extractor = EndpointSpanExtractor( input_dim=self._sentence_encoder.get_output_dim(), combination="x,y") self._span_hidden = TimeDistributed( Linear(2 * self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim())) self._predicate_hidden = Linear( self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim()) self._qarg_predictor = Linear(self._qarg_ffnn.get_output_dim(), self.vocab.get_vocab_size("qarg-labels")) self._metric = BinaryF1()
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, tan_ffnn: FeedForward, inject_predicate: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(SpanToTanModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._tan_ffnn = tan_ffnn self._inject_predicate = inject_predicate self._span_extractor = EndpointSpanExtractor( input_dim=self._sentence_encoder.get_output_dim(), combination="x,y") prediction_input_dim = (3 * self._sentence_encoder.get_output_dim() ) if self._inject_predicate else ( 2 * self._sentence_encoder.get_output_dim()) self._tan_pred = TimeDistributed( Sequential( Linear(prediction_input_dim, self._tan_ffnn.get_input_dim()), ReLU(), self._tan_ffnn, Linear(self._tan_ffnn.get_output_dim(), self.vocab.get_vocab_size("tan-string-labels")))) self._metric = BinaryF1()
def __init__(self, input_dim=13, num_classes=9, d_model=64, n_head=2, n_layers=5, d_inner=128, activation="relu", dropout=0.017998950510888446, max_len=200): super(PETransformerModel, self).__init__() self.modelname = f"PeTransformerEncoder_input-dim={input_dim}_num-classes={num_classes}_" \ f"d-model={d_model}_d-inner={d_inner}_n-layers={n_layers}_n-head={n_head}_" \ f"dropout={dropout}" encoder_layer = TransformerEncoderLayer(d_model, n_head, d_inner, dropout, activation) encoder_norm = LayerNorm(d_model) self.inlinear = Linear(input_dim, d_model) self.relu = ReLU() self.transformerencoder = TransformerEncoder(encoder_layer, n_layers, encoder_norm) self.flatten = Flatten() self.outlinear = Linear(d_model, num_classes) self.pe = PositionalEncoding(d_model, max_len=max_len) """
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, question_encoder: SlotSequenceEncoder, span_selector: PruningSpanSelector, classify_invalids: bool = True, invalid_hidden_dim: int = 100, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(QuestionToSpanModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._question_encoder = question_encoder self._span_selector = span_selector self._classify_invalids = classify_invalids self._invalid_hidden_dim = invalid_hidden_dim injected_embedding_dim = self._sentence_encoder.get_output_dim( ) + self._question_encoder.get_output_dim() extra_input_dim = self._span_selector.get_extra_input_dim() if injected_embedding_dim != extra_input_dim: raise ConfigurationError( "Sum of pred rep and question embedding dim %s did not match span selector injection dim of %s" % (injected_embedding_dim, extra_input_dim)) if self._classify_invalids: self._invalid_pred = Sequential( Linear(extra_input_dim, self._invalid_hidden_dim), ReLU(), Linear(self._invalid_hidden_dim, 1)) self._invalid_metric = BinaryF1()
def __init__(self): super(Net, self).__init__() self.conv1 = Conv2d(1, 10, kernel_size=5) self.conv2 = Conv2d(10, 20, kernel_size=5) self.conv2_drop = Dropout2d() self.fc1 = Linear(320, 50) self.fc2 = Linear(50, 10)
def __init__(self, d: int, q: int, layers: int, activation=ReLU, init_f=xavier_normal_, bias=False): """ :param d: Dimension of the input data :param q: Number of hidden neurons in layer 1 :param layers: Number of Layers :param activation: Activation function :param init_f: Initialization function """ super(FCNetwork, self).__init__() self._init_f = init_f self._bias = bias self._fc_layers = [] self._activation_layers = [] for i in range(layers): layer = Linear(d if i == 0 else q, q, bias=bias) activation_layer = activation() self._fc_layers.append(layer) self._activation_layers.append(activation_layer) self.add_module("Dense layer {}".format(i), layer) self.add_module("Activation layer {}".format(i), activation_layer) self._last_fc = Linear(q, 2) self._softmax = Softmax() self._initialize_weights() self.float() self.to(utils.get_device())
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, use_gate=False): #fill in reordering of operations as done in https://arxiv.org/pdf/1910.06764.pdf #d_model: dimension of embedding for each input super(StableTransformerLayer, self).__init__() self.use_gate = use_gate self.gate_mha = GRUGate(d_model) self.gate_mlp = GRUGate(d_model) self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = F.relu
def __init__(self): super(Net, self).__init__() self.conv1 = Conv2d(3, 6, 5) self.pool = MaxPool2d(2, 2) self.conv2 = Conv2d(6, 16, 5) self.fc1 = Linear(16 * 5 * 5, 120) self.fc2 = Linear(120, 84) self.fc3 = Linear(84, 10)
def __init__(self, state_feats, max_actions, hidden=16): super().__init__() self.in_dim = state_feats self.hidden = hidden self.max_actions = max_actions self.lin = Sequential(Linear(self.in_dim, self.hidden), Tanh()) self.out = Linear(self.hidden, self.max_actions)
def __init__(self, embA_size: int, embB_size: int, hidden_dim: int): super(SpanRepAssembly, self).__init__() self.embA_size = embA_size self.embB_size = embB_size self.hidden_dim = hidden_dim self.hiddenA = TimeDistributed(Linear(embA_size, hidden_dim)) self.hiddenB = TimeDistributed( Linear(embB_size, hidden_dim, bias=False))
def __init__(self, state_feats, action_feats, hidden=16, layers=1): super().__init__() self.in_dim = state_feats + action_feats self.hidden = hidden self.lin = Sequential(Linear(self.in_dim, self.hidden), Tanh(), Linear(self.hidden, self.hidden//10), Tanh()) self.out = Linear(self.hidden//10, 1)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, sentence_accumulator: Seq2SeqEncoder, use_salience: bool, use_pos_embedding: bool, use_output_bias: bool, use_novelty: bool, dropout: float = 0.3, pos_embedding_num: int = 50, pos_embedding_size: int = 128) -> None: super(SummaRuNNer, self).__init__(vocab) self._source_embedder = source_embedder self._sentence_encoder = sentence_encoder self._se_output_dim = self._sentence_encoder.get_output_dim() self._sentence_accumulator = sentence_accumulator self._h_sentence_dim = self._sentence_accumulator.get_output_dim() self._dropout_layer = Dropout(dropout) self._content_projection_layer = Linear(self._h_sentence_dim, 1) self._use_salience = use_salience if use_salience: self._document_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=True) self._salience_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=False) self._use_pos_embedding = use_pos_embedding if use_pos_embedding: self._pos_embedding_num = pos_embedding_num self._pos_embedding_size = pos_embedding_size self._pos_embedding_layer = Embedding(pos_embedding_num, pos_embedding_size) self._pos_projection_layer = Linear(pos_embedding_size, 1) self._use_output_bias = use_output_bias if use_output_bias: self._output_bias = Parameter(torch.zeros(1).uniform_(-0.1, 0.1), requires_grad=True) self._use_novelty = use_novelty if use_novelty: self._novelty_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=False)
def __init__(self, hidden_dim_size, apsect_dim_size): super(Attention, self).__init__() self.hidden_dim_size = hidden_dim_size self.apsect_dim_size = apsect_dim_size #(d,d) self.W_h = Linear(self.hidden_dim_size, self.hidden_dim_size) #(d_a,d_a) self.W_v = Linear(self.apsect_dim_size, self.apsect_dim_size) #(1, d_a) self.w = Linear(self.hidden_dim_size + self.apsect_dim_size, 1) # to define projection parameters for W_p and W_x self.W_p = Linear(self.hidden_dim_size, self.hidden_dim_size) self.W_x = Linear(self.hidden_dim_size, self.hidden_dim_size)
def __init__(self, embed_dim, num_heads, dropout=0.): super(RelationAwareMultiheadAttention, self).__init__() self.embed_dim = embed_dim self.num_heads = num_heads self.head_dim = embed_dim // num_heads assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" self.dropout = nn.Dropout(dropout) self.W_Q = Linear(embed_dim, embed_dim, bias=False) self.W_K = Linear(embed_dim, embed_dim, bias=False) self.W_V = Linear(embed_dim, embed_dim, bias=False) # The number of different relations is 33. We add another dummy relation for pairs the are in fact paddings. # This is necessary to batch multiple input sequences with varying lengths together. self.relation_bias = nn.Embedding(34, embed_dim)
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, sentence_accumulator: Seq2SeqEncoder, use_salience: bool, use_pos_embedding: bool, use_output_bias: bool, use_novelty: bool, dropout: float = 0.3, pos_embedding_num: int = 50, pos_embedding_size: int = 128) -> None: super().__init__(vocab) self._source_embedder = source_embedder self._sentence_encoder = sentence_encoder self._se_output_dim = self._sentence_encoder.get_output_dim() self._sentence_accumulator = sentence_accumulator self._h_sentence_dim = self._sentence_accumulator.get_output_dim() self._dropout_layer = Dropout(dropout) self._content_projection_layer = Linear(self._h_sentence_dim, 1) # options to train the system on self._use_pos_embedding = use_pos_embedding self._use_salience = use_salience self._use_novelty = use_novelty if use_salience: self._document_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=True) self._salience_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=False) if use_pos_embedding: self._pos_embedding_num = pos_embedding_num self._pos_embedding_size = pos_embedding_size self._pos_embedding_layer = Embedding(pos_embedding_num, pos_embedding_size) self._pos_projection_layer = Linear(pos_embedding_size, 1) self._use_novelty = use_novelty if use_novelty: self._novelty_linear_layer = Linear(self._h_sentence_dim, self._h_sentence_dim, bias=False)
def __init__(self, vocab: Vocabulary, bert_model: Union[str, BertModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH) -> None: super().__init__(vocab, regularizer) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self.num_classes = self.vocab.get_vocab_size("labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.tag_projection_layer = Linear(self.bert_model.config.hidden_size, self.num_classes) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self)
def __init__(self, d_model, nhead, num_encoder_layers, num_decoder_layers, dim_feedforward, dropout, activation, src_vocab_size, tgt_vocab_size): super(TransformerModel, self).__init__() self.pos_encoder = PositionalEncoding( d_model=d_model, dropout=0.1) # , max_len=100) encoder_layer = TransformerEncoderLayer( d_model, nhead, dim_feedforward, dropout, activation) encoder_norm = LayerNorm(d_model) self.encoder = TransformerEncoder( encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer( d_model, nhead, dim_feedforward, dropout, activation) decoder_norm = LayerNorm(d_model) self.decoder = TransformerDecoder( decoder_layer, num_decoder_layers, decoder_norm) self.d_model = d_model self.nhead = nhead self.linear = Linear(d_model, tgt_vocab_size) self.transformer = Transformer(d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation) self.encoder_embedding = nn.Embedding(src_vocab_size, d_model) self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model) self._reset_parameters()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, ignore_span_metric: bool = False) -> None: super(SemanticRoleLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, binary_feature_dim: int, initializer: InitializerApplicator, embedding_dropout: float = 0.0) -> None: super(SemanticRoleLabeler, self).__init__(vocab) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.stacked_encoder = stacked_encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed( Linear(self.stacked_encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) initializer(self) if text_field_embedder.get_output_dim( ) + binary_feature_dim != stacked_encoder.get_input_dim(): raise ConfigurationError( "The SRL Model uses a binary verb indicator feature, meaning " "the input dimension of the stacked_encoder must be equal to " "the output dimension of the text_field_embedder + 1.")
def main(seed: int): print("seed =", seed) for N in (1, 64, 4096): torch.manual_seed(seed) myvgg = vgg.vgg16() assert isinstance(myvgg.classifier[0], Linear) assert isinstance(myvgg.classifier[3], Linear) if N == 1: # 逐次 myvgg.classifier[0] = SequentialLinear(myvgg.classifier[0]) myvgg.classifier[3] = SequentialLinear(myvgg.classifier[3]) elif N == 64: # 準同期 myvgg.classifier[0] = SemisyncLinear(myvgg.classifier[0]) myvgg.classifier[3] = SemisyncLinear(myvgg.classifier[3]) myvgg.classifier[-1] = Linear(4096, 10) # Dropout 抜き myvgg.classifier = nn.Sequential( myvgg.classifier[0], # Linear (Semi) myvgg.classifier[1], # ReLU myvgg.classifier[3], # Linear (Semi) myvgg.classifier[4], # ReLU myvgg.classifier[6], # Linear ) print(myvgg) myvgg.to(device) record = conduct(myvgg, *(preprocess.cifar_10_for_vgg_loaders())) write_final_record(record, N)
def __init__(self, input_size, output_size, convs_configs, dropout=.5, normalize=True): super(MultiCNN, self).__init__() input_size = input_size concat_cnn_output_size = 0 output_size = output_size self._normalize = normalize self.cnn_modules = nn.ModuleList() for configs in convs_configs: channel_size = configs['channel_size'] kernel_size = configs['kernel_size'] padding = configs['padding'] concat_cnn_output_size += channel_size module = Conv1d(input_size, channel_size, kernel_size=kernel_size, padding=padding) self.cnn_modules.append(module) self.batch_normlize = BatchNorm1d(concat_cnn_output_size) self.dropout = Dropout(dropout) self.output_linear = Linear(concat_cnn_output_size, output_size)
def __init__(self, input_size, embed_size, hidden_size, aspect_size, num_class, embedding=None): super(ATAELSTM, self).__init__() self.embed_size = embed_size self.aspect_size = aspect_size self.num_class = num_class # emeddding if embedding is not None: self.embeding = Embedding.from_pretrained(torch.Tensor(embedding)) self.embeding.weight.requires_grad = False else: self.embeding = Embedding(input_size, embed_size, padding_idx=0) # (batch size, N, embedding size) self.apect_embeding = Embedding(aspect_size, embed_size) self.rnn = LSTM(input_size=embed_size, hidden_size=hidden_size, bidirectional=True, batch_first=True, num_layers=1) self.att = Attention(hidden_size * 2, aspect_size) self.fc = Linear(hidden_size * 2, num_class, bias=True)
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, clause_embedding_dim: int, slot_embedding_dim: int, span_selector: SpanSelector, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(ClauseAnsweringModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._clause_embedding_dim = clause_embedding_dim self._slot_embedding_dim = slot_embedding_dim self._span_selector = span_selector self._question_embedding_dim = span_selector.get_extra_input_dim() self._clause_embedding = Embedding( vocab.get_vocab_size("clause-template-labels"), clause_embedding_dim) self._slot_embedding = Embedding( vocab.get_vocab_size("answer-slot-labels"), slot_embedding_dim) self._combined_embedding_dim = self._sentence_encoder.get_output_dim() + \ self._clause_embedding_dim + \ self._slot_embedding_dim self._question_projection = Linear(self._combined_embedding_dim, self._question_embedding_dim) if self._question_embedding_dim == 0: raise ConfigurationError( "Question embedding dim (span selector extra input dim) cannot be 0" )
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, # binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(LstmSwag, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.encoder = encoder self.embedding_dropout = Dropout(p=embedding_dropout) self.output_prediction = Linear(self.encoder.get_output_dim(), 1, bias=False) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text embedding dim", "eq encoder input dim") self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, stacked_encoder: Seq2SeqEncoder, predicate_feature_dim: int, dim_hidden: int = 100, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(SpanDetector, self).__init__(vocab, regularizer) self.dim_hidden = dim_hidden self.text_field_embedder = text_field_embedder self.predicate_feature_embedding = Embedding(2, predicate_feature_dim) self.embedding_dropout = Dropout(p=embedding_dropout) self.threshold_metric = ThresholdMetric() self.stacked_encoder = stacked_encoder self.span_hidden = SpanRepAssembly( self.stacked_encoder.get_output_dim(), self.stacked_encoder.get_output_dim(), self.dim_hidden) self.pred = TimeDistributed(Linear(self.dim_hidden, 1))
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, feedforward: Optional[FeedForward] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, dropout: float = 0.0, label_name: str = 'target-sentiment-labels') -> None: super().__init__(vocab, regularizer) ''' :param vocab: A Vocabulary, required in order to compute sizes for input/output projections. :param embedder: Used to embed the text. :param encoder: Encodes the sentence/text. E.g. LSTM :param feedforward: An optional feed forward layer to apply after the encoder :param initializer: Used to initialize the model parameters. :param regularizer: If provided, will be used to calculate the regularization penalty during training. :param dropout: To apply dropout after each layer apart from the last layer. All dropout that is applied to timebased data will be `variational dropout`_ all else will be standard dropout. :param label_name: Name of the label name space. This is based on the LSTM model by `Tang et al. 2016 <https://www.aclweb.org/anthology/C16-1311.pdf>`_ ''' self.label_name = label_name self.embedder = embedder self.encoder = encoder self.num_classes = self.vocab.get_vocab_size(self.label_name) self.feedforward = feedforward if feedforward is not None: output_dim = self.feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.label_projection = Linear(output_dim, self.num_classes) self.metrics = {"accuracy": CategoricalAccuracy()} self.f1_metrics = {} # F1 Scores label_index_name = self.vocab.get_index_to_token_vocabulary( self.label_name) for label_index, _label_name in label_index_name.items(): _label_name = f'F1_{_label_name.capitalize()}' self.f1_metrics[_label_name] = F1Measure(label_index) self._variational_dropout = InputVariationalDropout(dropout) self._naive_dropout = Dropout(dropout) check_dimensions_match(embedder.get_output_dim(), encoder.get_input_dim(), 'Embedding', 'Encoder') if self.feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), 'Encoder', 'FeedForward') initializer(self)
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): super(TransformerEncoderLayer, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model self.linear1 = Linear(d_model, dim_feedforward) self.dropout = Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__(self, bert_model_type: str): super(BertMlp, self).__init__() bert_output_size = 768 self.bert_layer = BertModel.from_pretrained(bert_model_type) for param in self.bert_layer.parameters(): param.requires_grad = False self.dence = Linear(bert_output_size, 5)
def setUp(self) -> None: torch.manual_seed(0) base_layer = Linear(2, 4) base_layer.weight = Parameter( tensor([[1.7, 0.4, 1, 2.2], [1.8, -1, 0.9, -0.2]], requires_grad=True).t()) base_layer.bias = Parameter( tensor([0.0, 0.0, 0.0, 0.0], requires_grad=True)) self.layer = RotationalLinear(base_layer).to(device)