Python LinearMatrixAttention示例，allennlp.modules.matrix_attention.LinearMatrixAttention Python示例

示例#1

0

显示文件

文件： linear_matrix_attention_test.py 项目： ha-lins/medical_dialog

    def test_bidaf_trilinear_similarity(self):
        linear = LinearMatrixAttention(2, 2, combination="x,y,x*y")
        linear._weight_vector = Parameter(
            torch.FloatTensor([-0.3, 0.5, 2.0, -1.0, 1, 1]))
        linear._bias = Parameter(torch.FloatTensor([0.0]))
        output = linear(
            torch.FloatTensor([[[0, 0], [4, 5]], [[-7, -8], [10, 11]]]),
            torch.FloatTensor([[[1, 2], [4, 5]], [[7, 8], [10, 11]]]),
        )

        assert_almost_equal(
            output.data.numpy(),
            numpy.array([
                [
                    [0 + 0 + 2 + -2 + 0 + 0, 0 + 0 + 8 + -5 + 0 + 0],
                    [
                        -1.2 + 2.5 + 2 + -2 + 4 + 10,
                        -1.2 + 2.5 + 8 + -5 + 16 + 25
                    ],
                ],
                [
                    [
                        2.1 + -4 + 14 + -8 + -49 + -64,
                        2.1 + -4 + 20 + -11 + -70 + -88
                    ],
                    [
                        -3 + 5.5 + 14 + -8 + 70 + 88,
                        -3 + 5.5 + 20 + -11 + 100 + 121
                    ],
                ],
            ]),
            decimal=2,
        )

示例#2

0

显示文件

    def test_linear_similarity(self):
        linear = LinearMatrixAttention(3, 3)
        linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5, 2.0, -1.0, 1, 1]))
        linear._bias = Parameter(torch.FloatTensor([.1]))
        output = linear(Variable(torch.FloatTensor([[[0, 0, 0], [4, 5, 6]], [[-7, -8, -9], [10, 11, 12]]])),
                        Variable(torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])))

        assert_almost_equal(output.data.numpy(), numpy.array([[[4.1000, 7.1000], [17.4000, 20.4000]],
                                                              [[-9.8000, -6.8000], [36.6000, 39.6000]]]),
                            decimal=2)

示例#3

0

显示文件

文件： linear_matrix_attention_test.py 项目： pyknife/allennlp

    def test_linear_similarity(self):
        linear = LinearMatrixAttention(3, 3)
        linear._weight_vector = Parameter(torch.FloatTensor([-.3, .5, 2.0, -1.0, 1, 1]))
        linear._bias = Parameter(torch.FloatTensor([.1]))
        output = linear(Variable(torch.FloatTensor([[[0, 0, 0], [4, 5, 6]], [[-7, -8, -9], [10, 11, 12]]])),
                        Variable(torch.FloatTensor([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])))

        assert_almost_equal(output.data.numpy(), numpy.array([[[4.1000, 7.1000], [17.4000, 20.4000]],
                                                              [[-9.8000, -6.8000], [36.6000, 39.6000]]]),
                            decimal=2)

示例#4

0

显示文件

文件： gat.py 项目： AromaR/RelEx

    def __init__(self,
                 input_dim: int,
                 output_dim: int,
                 num_heads: int = 1,
                 activation: Activation = None,
                 input_dropout: float = 0.0,
                 att_dropout: float = 0.0) -> None:
        super().__init__()
        self._hidden_dim = output_dim
        self._weight_vector = nn.Parameter(
            torch.FloatTensor(input_dim, self._hidden_dim))
        self.activation = activation or (lambda x: x)

        attention_dim = output_dim / num_heads
        assert attention_dim.is_integer(
        ), "output dim must be divisible by number of heads"
        self._attention_dim = int(attention_dim)
        self._num_heads = num_heads
        self.matrix_attention = LinearMatrixAttention(
            tensor_1_dim=self._attention_dim,
            tensor_2_dim=self._attention_dim,
            combination='x,y')

        if input_dropout is not None and input_dropout > 0:
            self.input_dropout = nn.Dropout(input_dropout)
        else:
            self.input_dropout = lambda x: x

        if att_dropout is not None and att_dropout > 0:
            self.att_dropout = nn.Dropout(att_dropout)
        else:
            self.att_dropout = lambda x: x

        self.reset_parameters()

示例#5

0

显示文件

 def __init__(self, config):
     super(RobertaForRopes, self).__init__(config)
     self.roberta = RobertaModel(config)
     self.find_object1 = MLP(config.hidden_size, 1)
     self.find_object2 = MLP(config.hidden_size, 1)
     self.find_TP = MLP(config.hidden_size, 1)
     self.bb_matrix_attention = LinearMatrixAttention(
         tensor_1_dim=config.hidden_size,
         tensor_2_dim=config.hidden_size,
         combination="x,y,x*y")
     self.bs_bilinear_imilairty = BilinearMatrixAttention(
         matrix_1_dim=config.hidden_size, matrix_2_dim=config.hidden_size)
     self.ss_matrix_attention = LinearMatrixAttention(
         tensor_1_dim=config.hidden_size,
         tensor_2_dim=config.hidden_size,
         combination="x,y,x*y")
     self.rel_SPo1_SPo2 = Relevance(config)
     self.pol_TP_SP = MLP(config.hidden_size * 2, 2)
     # self.rel_TPo1_TPo2 = None
     # self.answer_according_to_question = None
     self.init_weights()

示例#6

0

显示文件

文件： hotpot_bert_chainex_wo_ans.py 项目： jifan-chen/multi_reasoning

 def __init__(self, span_dim,
                    max_decoding_steps=5, predict_eos=True, cell='lstm',
                    train_helper="sample", val_helper="beamsearch", beam_size=3,
                    aux_input_dim=None,#200,#None,
                    pass_label=False):
     super().__init__()
     self.evd_decoder = PointerNetDecoder(LinearMatrixAttention(span_dim, span_dim, "x,y,x*y"),
                                          memory_dim=span_dim,
                                          aux_input_dim=aux_input_dim,
                                          train_helper=train_helper,
                                          val_helper=val_helper,
                                          beam_size=beam_size,
                                          max_decoding_steps=max_decoding_steps,
                                          predict_eos=predict_eos,
                                          cell=cell)

示例#7

0

显示文件

文件： image_srl_model.py 项目： Yoark/utils_srl

    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None,
                 ignore_span_metric: bool = False,
                 srl_eval_path: str = DEFAULT_SRL_EVAL_PATH,
                 image_embedding_size: int = 2052,
                 lamb: float = 0.2) -> None:
        super().__init__(
            vocab,
            text_field_embedder,
            encoder,
            binary_feature_dim,
            embedding_dropout,
            initializer,
            regularizer,
            label_smoothing,
            ignore_span_metric,
            srl_eval_path,
        )

        self.image_embedding_size = image_embedding_size
        self.embed_dim = self.encoder.get_output_dim()

        self.img_enc = EncoderImagePrecomp(self.image_embedding_size, \
            self.embed_dim, no_imgnorm=False)
        self.vse_loss = ContrastiveLoss(margin=0.2)
        self.attention = LinearMatrixAttention(self.embed_dim, self.embed_dim)
        # tune it
        self.lamb = lamb
        self.lamb = torch.tensor(self.lamb)
        self.tag_projection_layer = TimeDistributed(
            Linear(self.embed_dim * 2, self.num_classes))

示例#8

0

显示文件

文件： end2end_model.py 项目： pig7788/LEONA

def build_model(vocab: Vocabulary) -> Model:
    print("Building the model")

    EMBEDDING_DIM = 300
    HIDDEN_DIM = 300
    NUM_FILTERS = 60
    NGRAM_FILTER_SIZES = (2, 3, 4, 5, 6)
    #out_dim for char = len(NGRAM_FILTER_SIZES) * NUM_FILTERS
    F_OUT = 200

    elmo_options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
    elmo_weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

    elmo_embedding = ElmoTokenEmbedder(options_file=elmo_options_file,
                                       weight_file=elmo_weight_file)

    character_embedding = Embedding(vocab=vocab,
                                    embedding_dim=EMBEDDING_DIM,
                                    vocab_namespace='character_vocab')
    cnn_encoder = CnnEncoder(embedding_dim=EMBEDDING_DIM,
                             num_filters=NUM_FILTERS,
                             ngram_filter_sizes=NGRAM_FILTER_SIZES)
    token_encoder = TokenCharactersEncoder(character_embedding, cnn_encoder)

    pos_tag_embedding = Embedding(vocab=vocab,
                                  embedding_dim=EMBEDDING_DIM,
                                  vocab_namespace='pos_tag_vocab')

    ner_tag_embedding = Embedding(vocab=vocab,
                                  embedding_dim=EMBEDDING_DIM,
                                  vocab_namespace='ner_tag_vocab')

    word_embedding = Embedding(vocab=vocab,
                               embedding_dim=EMBEDDING_DIM,
                               vocab_namespace='token_vocab')

    utterance_embedder = BasicTextFieldEmbedder(
        token_embedders={
            'elmo_tokens': elmo_embedding,
            'token_characters': token_encoder,
            'pos_tags': pos_tag_embedding,
            'ner_tags': ner_tag_embedding
        })

    #slot embed
    slot_embedder = BasicTextFieldEmbedder(token_embedders={
        'elmo_tokens': elmo_embedding,
        'token_characters': token_encoder,
    })

    utterance_lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(2 * EMBEDDING_DIM + 1024 +
                      len(NGRAM_FILTER_SIZES) * NUM_FILTERS,
                      HIDDEN_DIM,
                      num_layers=2,
                      batch_first=True,
                      bidirectional=True))
    slot_lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(1024 + len(NGRAM_FILTER_SIZES) * NUM_FILTERS,
                      HIDDEN_DIM,
                      num_layers=2,
                      batch_first=True,
                      bidirectional=True))

    similarity = LinearMatrixAttention(tensor_1_dim=2 * HIDDEN_DIM,
                                       tensor_2_dim=2 * HIDDEN_DIM,
                                       combination="x,y,x*y",
                                       activation=Activation.by_name('tanh')())

    modeling_lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(
            2 * 5 * HIDDEN_DIM,  # bi-direction
            HIDDEN_DIM,
            num_layers=2,
            batch_first=True,
            bidirectional=True))

    #step1_utterance
    utterance_embedder2 = BasicTextFieldEmbedder(
        token_embedders={
            'elmo_tokens': elmo_embedding,
            'token_characters': token_encoder,
            'pos_tags': pos_tag_embedding,
            'ner_tags': ner_tag_embedding
        })
    utterance_lstm2 = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(2 * EMBEDDING_DIM + 1024 +
                      len(NGRAM_FILTER_SIZES) * NUM_FILTERS,
                      HIDDEN_DIM,
                      num_layers=2,
                      batch_first=True,
                      bidirectional=True))

    ## FF to combines two lstm inputs
    final_linear_layer = FeedForward(2 * HIDDEN_DIM, 2, [HIDDEN_DIM, F_OUT],
                                     torch.nn.ReLU(), 0.3)
    #CRF model
    model = CrfTagger(vocab=vocab,
                      utterance_embedder=utterance_embedder,
                      utterance_embedder2=utterance_embedder2,
                      slot_embedder=slot_embedder,
                      utterance_encoder=utterance_lstm,
                      utterance_encoder2=utterance_lstm2,
                      slot_encoder=slot_lstm,
                      matrix_attention=similarity,
                      modeling_layer=modeling_lstm,
                      fc_ff_layer=final_linear_layer)
    return model

示例#9

0

显示文件

文件： drop_execution_parameters.py 项目： vgupta123/nmn-drop

    def __init__(
        self,
        question_encoding_dim: int,
        passage_encoding_dim: int,
        passage_attention_to_span: Seq2SeqEncoder,
        passage_startend_predictor,
        question_attention_to_span: Seq2SeqEncoder,
        passage_attention_to_count: Seq2SeqEncoder,
        num_implicit_nums: int = None,
        passage_count_predictor=None,
        passage_count_hidden2logits=None,
        dropout: float = 0.0,
    ):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        # Parameters for answer start/end prediction from PassageAttention
        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = passage_startend_predictor  # torch.nn.Linear(self.passage_attention_to_span.get_output_dim(), 2)

        # Parameters for answer start/end pred directly from passage encoding (direct PassageSpanAnswer from 1step prog)
        self.oneshot_psa_startend_predictor = torch.nn.Linear(
            passage_encoding_dim, 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.implicit_num_embeddings = torch.nn.Parameter(
            torch.FloatTensor(num_implicit_nums, passage_encoding_dim))
        torch.nn.init.normal_(self.implicit_num_embeddings,
                              mean=0.0,
                              std=0.001)
        self.implicitnum_bilinear_attention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # self.filter_matrix_attention = LinearMatrixAttention(
        #     tensor_1_dim=question_encoding_dim, tensor_2_dim=passage_encoding_dim, combination="x,y,x*y"
        # )

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        self._endpoint_span_extractor = EndpointSpanExtractor(
            input_dim=passage_encoding_dim, combination="x,y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

示例#10

0

显示文件

    combination='x,y', activation=tanh)
output = attention(vector, matrix)
print('Output from LinearAttention:', output)

# MatrixAttention
sequence_length1 = 10
sequence_length2 = 15

# dot product attention only allows matrices of the same size
matrix1 = torch.rand((batch_size, sequence_length1, embedding_dim1))
matrix2 = torch.rand((batch_size, sequence_length2, embedding_dim1))

matrix_attention = DotProductMatrixAttention()
output = matrix_attention(matrix1, matrix2)
print('Output shape of DotProductMatrixAttention:', output.shape)

# bilinear & linear attention allows inputs of different sizes
matrix1 = torch.rand((1, sequence_length1, embedding_dim1))
matrix2 = torch.rand((1, sequence_length2, embedding_dim2))

matrix_attention = BilinearMatrixAttention(
    matrix_1_dim=embedding_dim1, matrix_2_dim=embedding_dim2)
output = matrix_attention(matrix1, matrix2)
print('Output shape of BilinearMatrixAttention:', output.shape)

matrix_attention = LinearMatrixAttention(
    tensor_1_dim=embedding_dim1, tensor_2_dim=embedding_dim2,
    combination='x,y', activation=tanh)
output = matrix_attention(matrix1, matrix2)
print('Output shape of LinearMatrixAttention:', output.shape)

示例#11

0

显示文件

文件： execution_parameters.py 项目： SylvanLiu/COMP6248-Reproducability-Challenge-NMN-Drop-for-Reasoning-Over-Text

    def __init__(self,
                 question_encoding_dim: int,
                 passage_encoding_dim: int,
                 passage_attention_to_span: Seq2SeqEncoder,
                 question_attention_to_span: Seq2SeqEncoder,
                 passage_attention_to_count: Seq2SeqEncoder,
                 passage_count_predictor=None,
                 passage_count_hidden2logits=None,
                 dropout: float = 0.0):
        super().__init__()

        self.num_counts = 10

        self.passage_attention_scalingvals = [1, 2, 5, 10]

        self.passage_attention_to_span = passage_attention_to_span
        self.passage_startend_predictor = torch.nn.Linear(
            self.passage_attention_to_span.get_output_dim(), 2)

        self.question_attention_to_span = question_attention_to_span
        self.question_startend_predictor = torch.nn.Linear(
            self.question_attention_to_span.get_output_dim(), 2)

        self.passage_attention_to_count = passage_attention_to_count
        # self.passage_count_predictor = torch.nn.Linear(self.passage_attention_to_count.get_output_dim(),
        #                                                self.num_counts)
        self.passage_count_predictor = passage_count_predictor
        # Linear from self.passage_attention_to_count.output_dim --> 1
        self.passage_count_hidden2logits = passage_count_hidden2logits

        self.dotprod_matrix_attn = DotProductMatrixAttention()

        self.filter_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=question_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # We will sum the passage-token-repr to the weighted-q-repr - to use x*y combination
        self.relocate_matrix_attention = LinearMatrixAttention(
            tensor_1_dim=passage_encoding_dim,
            tensor_2_dim=passage_encoding_dim,
            combination="x,y,x*y")

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_start_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        self.passage_to_end_date_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        # This computes a passage_to_passage attention, hopefully, for each token, putting a weight on date tokens
        # that are related to it.
        self.passage_to_num_attention: MatrixAttention = BilinearMatrixAttention(
            matrix_1_dim=passage_encoding_dim,
            matrix_2_dim=passage_encoding_dim)

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

示例#12

0

显示文件

文件： model.py 项目： yuweifamily/ContextualSP

    def __init__(self, vocab: Vocabulary,
                 text_encoder: Seq2SeqEncoder,
                 word_embedder: TextFieldEmbedder,
                 enable_training_log: bool = False,
                 inp_drop_rate: float = 0.2,
                 out_drop_rate: float = 0.2,
                 loss_weights: List = (0.2, 0.4, 0.4),
                 super_mode: str = 'before',
                 backbone: str = 'unet',
                 unet_down_channel: int = 256,
                 feature_sel: int = 127):
        super(UnifiedFollowUp, self).__init__(vocab)
        self.text_encoder = text_encoder
        self.word_embedder = word_embedder

        """
        Define model arch choices
        """
        self.backbone = backbone

        # input dropout
        if inp_drop_rate > 0:
            self.var_inp_dropout = InputVariationalDropout(p=inp_drop_rate)
        else:
            self.var_inp_dropout = lambda x: x
        # output dropout
        if out_drop_rate > 0:
            self.var_out_dropout = InputVariationalDropout(p=out_drop_rate)
        else:
            self.var_out_dropout = lambda x: x

        self.hidden_size = text_encoder.get_output_dim() // 2 if text_encoder.is_bidirectional() \
            else text_encoder.get_output_dim()

        self.output_size = text_encoder.get_output_dim()

        # ele -> element wise multiply
        # dot -> dot product
        # cos -> cosine similarity
        # emb_dot -> embedding dot product
        # emb_cos -> embedding cosine similarity
        # linear -> linear similarity
        # bilinear -> bilinear similarity

        feature_sel = feature_sel
        sel_arr = "{0:07b}".format(int(feature_sel))
        nni_choices = ['ele', 'dot', 'cos', 'emb_dot', 'emb_cos', 'linear', 'bilinear']

        self.segment_choices = [nni_choices[i] for i in range(7) if sel_arr[i] == '1']
        # if expand bi-direction, we will regard forward/backward as two channels
        self.expand_bidir = False

        self.similar_function = ModuleDict({
            'ele': ElementWiseMatrixAttention(),
            'dot': DotProductMatrixAttention(),
            'cos': CosineMatrixAttention(),
            'emb_dot': DotProductMatrixAttention(),
            'emb_cos': CosineMatrixAttention(),
            'bilinear': BilinearMatrixAttention(matrix_1_dim=self.output_size, matrix_2_dim=self.output_size),
            'linear': LinearMatrixAttention(tensor_1_dim=self.output_size, tensor_2_dim=self.output_size)
        })

        self.attn_channel = 0
        for choice in self.segment_choices:
            if choice == 'ele':
                self.attn_channel += self.output_size
            elif choice in ['dot', 'cos', 'emb_dot', 'emb_cos', 'bilinear', 'linear']:
                if self.expand_bidir:
                    self.attn_channel += 2
                else:
                    self.attn_channel += 1

        self.class_mapping: Dict[str, int] = get_class_mapping(super_mode=super_mode)

        # Here we have two choices now, one is MLP, and another is UNet
        if self.backbone == 'unet':
            self.segmentation_net = AttentionUNet(input_channels=self.attn_channel,
                                                  class_number=len(self.class_mapping.keys()),
                                                  down_channel=unet_down_channel)
        else:
            raise Exception("Currently we do not support for other arches.")

        class_zero_weight = loss_weights[0]
        class_one_weight = loss_weights[1]

        self.register_buffer('weight_tensor', torch.tensor([class_zero_weight, class_one_weight,
                                                            1 - class_zero_weight - class_one_weight]))
        self.loss = nn.CrossEntropyLoss(ignore_index=-1,
                                        weight=self.weight_tensor)

        # initialize metrics measurement
        self.metrics = {'ROUGE': BatchAverage(),
                        '_ROUGE1': BatchAverage(),
                        '_ROUGE2': BatchAverage(),
                        # TODO: You can speed up the code by disable BLEU since
                        #  the corpus-based BLEU metric is much time-consuming.
                        'BLEU': CorpusBLEUMetric(),
                        'EM': BatchAverage(),
                        'F1': FScoreMetric(prefix="1"),
                        'F2': FScoreMetric(prefix="2"),
                        'F3': FScoreMetric(prefix="3")}

        parameter_num = count_parameters(self)
        print(parameter_num)

        self.min_width = 8
        self.min_height = 8
        self.enable_training_log = enable_training_log