Пример #1
0
 def __init__(self, model, tag_dict, options, lstm_dims=None):
     """:type tag_dict: vocab_utils.Dictionary"""
     super(POSTagClassification, self).__init__(model)
     mlp_dims = [(lstm_dims or options.lstm_dims) * 2] + \
                [len(tag_dict) if i == -1 else i
                 for i in options.tagger_mlp_dims] + \
                [len(tag_dict)]
     self.mlp = nn.DenseLayers(self, mlp_dims, nn.activations[options.activation])
     self.options = options
Пример #2
0
 def __init__(self, model, tag_dict, options):
     super().__init__(model)
     self.activation = nn.activations[options.activation]
     self.bilinear_layer = nn.BiLinear(self,
                                       options.lstm_dims * 2 * options.span_lstm_layers + options.category_embedding,
                                       options.edge_bilinear_dim)
     dense_dims = [options.edge_bilinear_dim] + options.mlp_dims + \
                  [len(tag_dict)]
     self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation)
Пример #3
0
    def __init__(self, model, tag_dict, options):
        super(LabelEvaluation, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]
        self.label_count = len(tag_dict)

        dense_dims = [options.lstm_dims * 2 * options.span_lstm_layers
                      ] + options.label_mlp_dims + [len(tag_dict)]
        # don't use bias in last transform
        use_bias = [True] * (len(dense_dims) - 2) + [False]

        self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation,
                                          use_bias)
Пример #4
0
    def __init__(self, model, options):
        super(SpanEvaluation, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]
        self.ldims = options.lstm_dims

        dense_dims = [options.lstm_dims * 2 * options.span_lstm_layers
                      ] + options.mlp_dims + [1]
        # don't use bias in last transform
        use_bias = [True] * (len(dense_dims) - 2) + [False]

        self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation,
                                          use_bias)
Пример #5
0
    def __init__(self, model, options):
        super(EdgeEvaluation, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]
        self.ldims = options.lstm_dims

        self.bilinear_layer = nn.BiLinear(self, self.ldims * 2, options.bilinear_dim)

        dense_dims = [options.bilinear_dim] + options.mlp_dims + [1]
        # don't use bias in last transform
        use_bias = [True] * (len(dense_dims) - 2) + [False]

        self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation, use_bias)
Пример #6
0
    def __init__(self, model, statistics, options):
        super(LabelEvaluation, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]
        self.irels = list(statistics.labels)
        self.rels = {v: idx for idx, v in enumerate(self.irels)}
        self.ldims = options.lstm_dims

        self.relation_bilinear_layer = nn.BiLinear(self, self.ldims * 2,
                                                   options.label_bilinear_dim)
        relation_dense_dims = [options.label_bilinear_dim] + options.label_mlp_dims + \
                              [len(self.irels)]
        if any(i < len(self.irels)
               for i in [options.label_bilinear_dim] + options.label_mlp_dims):
            logger.warning("Too many labels!")

        self.relation_dense_layer = nn.DenseLayers(self, relation_dense_dims,
                                                   self.activation)
Пример #7
0
    def __init__(
            self,
            model,
            hrg_statistics,  # type: HRGStatistics
            options):
        super(StructuredPeceptronHRGScorer, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]

        self.edge_labels = list(
            word for word, count in hrg_statistics.nonterminals.most_common(300)) + \
                           list(hrg_statistics.structural_edges) + \
                           list(hrg_statistics.categories)

        self.possible_features = [("Edge", k) for k in self.edge_labels]
        logger.info("Consider {} features as graph embedding".format(
            len(self.possible_features)))
        self.possible_features.append("head_left")
        self.possible_features.append("head_right")
        # self.possible_features.append("head_left_1/2")
        # self.possible_features.append("head_left_2/2")
        # self.possible_features.append("head_right_1/2")
        # self.possible_features.append("head_right_2/2")
        self.feature_index = {
            i: idx
            for idx, i in enumerate(self.possible_features)
        }

        dense_dims = [options.lstm_dims * 2 * options.span_lstm_layers + len(self.possible_features) + 1] + \
                     options.hrg_mlp_dims + [1]
        # don't use bias in last transform
        use_bias = [True] * (len(dense_dims) - 2) + [False]

        self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation,
                                          use_bias)
        self.count_scale = self.add_parameters((1, ))
        self.count_scale_2 = self.add_parameters((1, ))

        if self.options.conflict_output_dir:
            ensure_dir(self.options.conflict_output_dir)
    def __init__(self, model,
                 hrg_statistics,  # type: HRGStatistics
                 options):
        super(EmbeddingHRGScorer, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]

        self.freq_edges = [edge for edge, count in hrg_statistics.edge_names.most_common(self.options.edge_count)]
        self.edge_embedding = nn.Embedding(self, list(self.freq_edges),
                                           options.edge_embedding_dim,
                                           init=dn.IdentityInitializer())

        dense_dims = [options.lstm_dims * 2 * options.lstm_layers + options.edge_embedding_dim] + options.hrg_mlp_dims + \
                     [1]
        # don't use bias in last transform
        use_bias = [True] * (len(dense_dims) - 2) + [False]

        self.dense_layer = nn.DenseLayers(self, dense_dims, self.activation, use_bias)
        self.attention_w1 = self.add_parameters((options.attention_dim,
                                                 options.edge_embedding_dim))
        self.attention_w2 = self.add_parameters((options.attention_dim,
                                                 options.lstm_dims * 2 * options.lstm_layers))
        self.attention_v = self.add_parameters((1, options.attention_dim))
Пример #9
0
    def __init__(self, model, statistics_or_dict, options):
        super(LabelEvaluation, self).__init__(model)
        self.options = options
        self.activation = nn.activations[options.activation]
        # for backward compatibility
        if isinstance(statistics_or_dict, Dictionary):
            tag_dict = statistics_or_dict
            self.irels = tag_dict.int_to_word
            self.rels = tag_dict.word_to_int
        else:
            tag_dict = statistics_or_dict.labels
            self.irels = list(tag_dict)
            self.rels = {v: idx for idx, v in enumerate(self.irels)}
        self.ldims = options.lstm_dims

        self.relation_bilinear_layer = nn.BiLinear(self, self.ldims * 2,
                                                   options.label_bilinear_dim)
        relation_dense_dims = [options.label_bilinear_dim] + options.label_mlp_dims + \
                              [len(self.irels)]
        if any(i < len(self.irels) for i in [options.label_bilinear_dim] + options.label_mlp_dims):
            logger.warning("Too many labels!")

        self.relation_dense_layer = nn.DenseLayers(self, relation_dense_dims,
                                                   self.activation)
Пример #10
0
    def __init__(self, words, pos, rels, w2i, options):
        self.model = dn.Model()
        self.trainer = dn.AdamTrainer(self.model)
        random.seed(1)

        # noinspection PyUnresolvedReferences
        self.activations = {
            'tanh':
            dn.tanh,
            'sigmoid':
            dn.logistic,
            'relu':
            dn.rectify,
            'tanh3':
            (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.options = options
        self.oracle = options.oracle
        self.ldims = options.lstm_dims * 2
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.wordsCount = words
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.relation = options.relation
        if self.relation:
            self.rels = {word: ind for ind, word in enumerate(rels)}
            self.irels = rels
        else:
            self.rels = {"X": 0}
            self.irels = ["X"]

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.window

        self.nnvecs = self.headFlag + self.rlFlag * 2 + self.rlMostFlag * 2
        self.actions = transition_utils.ArcHybridActions(
            self.irels, options.action_file)

        if options.external_embedding is not None:
            self.extrnd, self.elookup, self.edim = nn.get_external_embedding(
                self.model, options.external_embedding)
            print('Load external embedding. Vector dimensions', self.edim)
        else:
            self.extrnd, self.elookup, self.edim = None, None, 0

        dims = self.wdims + self.pdims + self.edim
        self.rnn = nn.BiLSTM(self.model,
                             [dims] + [self.ldims] * options.lstm_layers)

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units
        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(words) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters(
            (len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))

        self.word2lstm = self.model.add_parameters(
            (self.ldims, self.wdims + self.pdims + self.edim))
        self.word2lstmbias = self.model.add_parameters((self.ldims))
        # self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims))
        # self.lstm2lstmbias = self.model.add_parameters((self.ldims))

        input_dims = self.ldims * self.nnvecs * (self.k + 1)
        action_dims = [
            input_dims, self.hidden_units, self.hidden2_units,
            len(self.actions)
        ]
        action_dims = [i for i in action_dims if i != 0]
        self.action_classifier = nn.DenseLayers(self.model, action_dims,
                                                self.activation)

        relation_dims = [
            input_dims, self.hidden_units, self.hidden2_units,
            len(self.actions.decoded_with_relation)
        ]
        relation_dims = [i for i in relation_dims if i != 0]
        self.relation_classifier = nn.DenseLayers(self.model, relation_dims,
                                                  self.activation)

        if self.options.beam_size == 0:
            self.options.beam_search = False