示例#1
0
    def __init__(self,
                 vocab: Vocabulary,
                 task: str,
                 encoder: Seq2SeqEncoder,
                 png_params_dim: int,
                 label_smoothing: float = 0.0,
                 dropout: float = 0.0,
                 adaptive: bool = False,
                 features: List[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TagDecoder, self).__init__(vocab, regularizer)

        self.dropout = torch.nn.Dropout(p=dropout)

        self.task = task
        self.encoder = encoder
        self.output_dim = encoder.get_output_dim()
        self.label_smoothing = label_smoothing
        self.num_classes = self.vocab.get_vocab_size(task)
        self.adaptive = adaptive
        self.features = [
            f.replace('[', '_').replace(']', '_') for f in features
        ] if features else []

        self.metrics = {
            "acc": CategoricalAccuracy(),
            # "acc3": CategoricalAccuracy(top_k=3)
        }

        if self.adaptive:
            # TODO
            adaptive_cutoffs = [
                round(self.num_classes / 15), 3 * round(self.num_classes / 15)
            ]
            self.task_output = AdaptiveLogSoftmaxWithLoss(
                self.output_dim,
                self.num_classes,
                cutoffs=adaptive_cutoffs,
                div_value=4.0)
        else:
            self.task_output = TimeDistributedPGN(
                LinearWithPGN(png_params_dim, self.output_dim,
                              self.num_classes))

        self.feature_outputs = torch.nn.ModuleDict()
        self.features_metrics = {}
        for feature in self.features:
            self.feature_outputs[feature] = TimeDistributedPGN(
                LinearWithPGN(png_params_dim, self.output_dim,
                              vocab.get_vocab_size(feature)))
            self.features_metrics[feature] = {
                "acc": CategoricalAccuracy(),
            }

        initializer(self)
示例#2
0
 def get_calc(context):
     if self.model_params.use_hardcoded_cutoffs:
         vocab_size = self.entity_embeds.weight.shape[0]
         cutoffs = self.model_params.adaptive_softmax_cutoffs
     else:
         raise NotImplementedError
     in_features = self.entity_embeds.weight.shape[1]
     n_classes = self.entity_embeds.weight.shape[0]
     return AdaptiveLogSoftmaxWithLoss(in_features,
                                       n_classes,
                                       cutoffs,
                                       div_value=1.0).to(self.device)
示例#3
0
    def __init__(self, num_embeddings, embedding_dim, padding_idx,
                 conv_filters, n_highways, projection_size, vocab_size):
        super(ELMoNet, self).__init__()

        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.padding_idx = padding_idx
        self.conv_filters = conv_filters
        self.n_highways = n_highways
        self.projection_size = projection_size

        self.char_embedding = CharEmbedding(self.num_embeddings,
                                            self.embedding_dim,
                                            self.padding_idx,
                                            self.conv_filters, self.n_highways,
                                            self.projection_size)

        self.hidden_size = 2048

        self.lstm1f = nn.LSTM(self.projection_size,
                              self.hidden_size,
                              1,
                              batch_first=True)
        self.lstm2f = nn.LSTM(self.projection_size,
                              self.hidden_size,
                              1,
                              batch_first=True)
        self.lstm1r = nn.LSTM(self.projection_size,
                              self.hidden_size,
                              1,
                              batch_first=True)
        self.lstm2r = nn.LSTM(self.projection_size,
                              self.hidden_size,
                              1,
                              batch_first=True)

        self.linear1f = nn.Linear(self.hidden_size, self.projection_size)
        self.linear1r = nn.Linear(self.hidden_size, self.projection_size)
        self.linear2f = nn.Linear(self.hidden_size, self.projection_size)
        self.linear2r = nn.Linear(self.hidden_size, self.projection_size)

        self.adap_loss = AdaptiveLogSoftmaxWithLoss(self.projection_size,
                                                    vocab_size,
                                                    [10, 100, 1000])
示例#4
0
    def __init__(self,
                 input_dim,
                 num_classes,
                 label_smoothing: float = 0.03,
                 adaptive: bool = False) -> None:
        super(TagDecoder, self).__init__()

        self.label_smoothing = label_smoothing
        self.num_classes = num_classes
        self.adaptive = adaptive

        if self.adaptive:
            adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)]
            self.task_output = AdaptiveLogSoftmaxWithLoss(input_dim,
                                                          self.num_classes,
                                                          cutoffs=adaptive_cutoffs,
                                                          div_value=4.0)
        else:
            self.task_output = Linear(self.output_dim, self.num_classes)
示例#5
0
    def __init__(self,
                 vocab: Vocabulary,
                 task: str,
                 encoder: Seq2SeqEncoder,
                 lang_embed_dim: int = None,
                 use_lang_feedforward: bool = False,
                 lang_feedforward: FeedForward = None,
                 label_smoothing: float = 0.0,
                 dropout: float = 0.0,
                 adaptive: bool = False,
                 features: List[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TagDecoder, self).__init__(vocab, regularizer)

        self.lang_embedding = None
        if lang_embed_dim is not None:
            self.lang_embedding = Embedding(self.vocab.get_vocab_size("langs"), lang_embed_dim)

        self.dropout = torch.nn.Dropout(p=dropout)

        self.task = task
        self.encoder = encoder
        self.output_dim = encoder.get_output_dim()
        self.label_smoothing = label_smoothing
        self.num_classes = self.vocab.get_vocab_size(task)
        self.adaptive = adaptive
        self.features = features if features else []

        self.use_lang_feedforward = use_lang_feedforward
        if self.lang_embedding is not None and use_lang_feedforward:
            self.lang_feedforward = lang_feedforward or \
                                     FeedForward(self.output_dim, 1,
                                                 self.output_dim,
                                                 Activation.by_name("elu")())

        self.metrics = {
            "acc": CategoricalAccuracy(),
            # "acc3": CategoricalAccuracy(top_k=3)
        }

        if self.adaptive:
            # TODO
            adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)]
            self.task_output = AdaptiveLogSoftmaxWithLoss(self.output_dim,
                                                          self.num_classes,
                                                          cutoffs=adaptive_cutoffs,
                                                          div_value=4.0)
        else:
            self.task_output = TimeDistributed(Linear(self.output_dim, self.num_classes))

        self.feature_outputs = torch.nn.ModuleDict()
        self.features_metrics = {}
        for feature in self.features:
            self.feature_outputs[feature] = TimeDistributed(Linear(self.output_dim,
                                                                   vocab.get_vocab_size(feature)))
            self.features_metrics[feature] = {
                "acc": CategoricalAccuracy(),
            }

        initializer(self)
示例#6
0
    def __init__(self,
                 vocab: Vocabulary,
                 task: str,
                 encoder: Seq2SeqEncoder,
                 prev_task: str,
                 prev_task_embed_dim: int = None,
                 label_smoothing: float = 0.0,
                 dropout: float = 0.0,
                 adaptive: bool = False,
                 features: List[str] = None,
                 metric: str = "acc",
                 loss_weight: float = 1.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TagDecoder, self).__init__(vocab, regularizer)

        self.task = task
        self.dropout = torch.nn.Dropout(p=dropout)
        self.encoder = encoder
        self.output_dim = encoder.get_output_dim()
        self.label_smoothing = label_smoothing
        self.num_classes = self.vocab.get_vocab_size(task)
        self.adaptive = adaptive
        self.features = features if features else []
        self.metric = metric
        self.loss_weight = loss_weight

        # A: add all possible relative encoding to vocabulary
        if self.vocab.get_token_index('100,root') == 1:
            for head in self.vocab.get_token_to_index_vocabulary('head_tags').keys():
                all_encodings = get_all_relative_encodings(head)
                self.vocab.add_tokens_to_namespace(tokens=all_encodings, namespace='dep_encoded')
            # make sure to put end token '100,root'
            self.vocab.add_token_to_namespace(token='100,root', namespace='dep_encoded')

        self.prev_task_tag_embedding = None
        if prev_task_embed_dim is not None and prev_task_embed_dim is not 0 and prev_task is not None:
            if not prev_task == 'rependency':
                self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size(prev_task), prev_task_embed_dim)
            else:
                self.prev_task_tag_embedding = Embedding(self.vocab.get_vocab_size('dep_encoded'), prev_task_embed_dim)

        # Choose the metric to use for the evaluation (from the defined
        # "metric" value of the task). If not specified, default to accuracy.
        if self.metric == "acc":
            self.metrics = {"acc": CategoricalAccuracy()}
        elif self.metric == "span_f1":
            self.metrics = {"span_f1": SpanBasedF1Measure(
                self.vocab, tag_namespace=self.task, label_encoding="BIO")}
        else:
            logger.warning(f"ERROR. Metric: {self.metric} unrecognized. Using accuracy instead.")
            self.metrics = {"acc": CategoricalAccuracy()}

        if self.adaptive:
            # TODO
            adaptive_cutoffs = [round(self.num_classes / 15), 3 * round(self.num_classes / 15)]
            self.task_output = AdaptiveLogSoftmaxWithLoss(self.output_dim,
                                                          self.num_classes,
                                                          cutoffs=adaptive_cutoffs,
                                                          div_value=4.0)
        else:
            self.task_output = TimeDistributed(Linear(self.output_dim, self.num_classes))

        self.feature_outputs = torch.nn.ModuleDict()
        self.features_metrics = {}
        for feature in self.features:
            self.feature_outputs[feature] = TimeDistributed(Linear(self.output_dim,
                                                                   vocab.get_vocab_size(feature)))
            self.features_metrics[feature] = {
                "acc": CategoricalAccuracy(),
            }

        initializer(self)
示例#7
0
def train(train_dataset, train_dataset_reverse, config_path, char_lexicon,
          word_lexicon, batch_size, learning_rate, device, max_epoch,
          output_dir):

    with open(config_path, 'r') as f:
        config = json.load(f)

    forward_loader = DataLoader(train_dataset,
                                batch_size=batch_size,
                                shuffle=False)
    backward_loader = DataLoader(train_dataset_reverse,
                                 batch_size=batch_size,
                                 shuffle=False)
    #    word_label_loader =
    num_embeddings = len(char_lexicon)
    padding_idx = char_lexicon['<pad>']
    model = ELMoNet(num_embeddings, config['embedding_dim'], padding_idx,
                    config['filters'], config['n_highways'],
                    config['projection_size'])

    model.train(True)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    cutoffs = [100, 1000, 10000]
    word_lexicon_size = len(word_lexicon)

    adap_loss = AdaptiveLogSoftmaxWithLoss(config['projection_size'],
                                           word_lexicon_size, cutoffs)
    adap_loss = adap_loss.to(device)
    trange = tqdm(enumerate(zip(forward_loader, backward_loader)),
                  total=len(forward_loader),
                  desc='training',
                  ascii=True)
    for epoch in range(max_epoch):

        loss = 0
        epoch_log = {}

        for i, ((forward_batch, forward_label), (backward_batch,
                                                 backward_label)) in trange:

            optimizer.zero_grad()

            forward_feature, backward_feature = \
                model.forward(forward_batch.to(device), backward_batch.to(device))

            forward_feature = forward_feature.view(-1,
                                                   forward_feature.size()[2])
            forward_label = forward_label.view(forward_label.size()[0] *
                                               forward_label.size()[1])
            forward_output, forward_loss = \
                adap_loss(forward_feature, forward_label.to(device))

            backward_feature = backward_feature.view(
                -1,
                backward_feature.size()[2])
            backward_label = backward_label.view(backward_label.size()[0] *
                                                 backward_label.size()[1])
            backward_output, backward_loss = \
                adap_loss(backward_feature, backward_label.to(device))

            forward_loss.backward()
            backward_loss.backward()

            optimizer.step()

            loss += (forward_loss.item() + backward_loss.item()) / 2
            trange.set_postfix(loss=loss / (i + 1))

        loss /= len(forward_loader)
        epoch_log["epoch{}".format(epoch)] = loss
        print("epoch=%f\n" % epoch)
        print("loss=%f\n" % loss)

        save_model(model, epoch, output_dir)
    save_log(epoch_log, output_dir)