Пример #1
0
    def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options):

        import dynet as dy # import here so we don't load Dynet if just running parser.py --help for example
        global dy

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {'tanh': dy.tanh, 'sigmoid': dy.logistic, 'relu':
                            dy.rectify, 'tanh3': (lambda x:
                            dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))}
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle


        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model,options,words,rels,langs,w2i,ch,self.nnvecs)
        self.irels = self.feature_extractor.irels


        mlp_in_dims = options.lstm_output_size*2*self.nnvecs*(self.k+1)
        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims, options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled' ,mlp_in_dims, options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,2*len(self.irels)+2,self.activation)
Пример #2
0
    def __init__(self, vocab, options):
        import dynet as dy
        from feature_extractor import FeatureExtractor
        global dy
        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)
        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]
        self.costaugFlag = options.costaugFlag
        self.feature_extractor = FeatureExtractor(self.model, options, vocab)
        self.labelsFlag = options.labelsFlag
        mlp_in_dims = options.lstm_output_size * 2

        self.unlabeled_MLP = biMLP(self.model, mlp_in_dims,
                                   options.mlp_hidden_dims,
                                   options.mlp_hidden2_dims, 1,
                                   self.activation)
        if self.labelsFlag:
            self.labeled_MLP = biMLP(self.model, mlp_in_dims,
                                     options.mlp_hidden_dims,
                                     options.mlp_hidden2_dims,
                                     len(self.feature_extractor.irels),
                                     self.activation)

        self.proj = options.proj
Пример #3
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k
        self.recursive_composition = options.use_recursive_composition
        #ugly hack

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else
                       0) + (2 if self.rlFlag or self.rlMostFlag else
                             0) + (1 if self.recursive_composition else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = options.lstm_input_size * self.nnvecs * (self.k + 1)

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
Пример #4
0
        def add_input(self, input_vec):

            x = dynet.concatenate([input_vec, self.h])

            i = dynet.logistic(self.W_i * x + self.b_i)
            f = dynet.logistic(self.W_f * x + self.b_f)
            g = dynet.tanh(self.W_c * x + self.b_c)
            o = dynet.logistic(self.W_o * x + self.b_o)

            c = dynet.cwise_multiply(f, self.c) + dynet.cwise_multiply(i, g)
            h = dynet.cwise_multiply(o, dynet.tanh(c))

            self.c = c
            self.h = h
            self.outputs.append(h)

            return self
Пример #5
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        global NO_COMP, SOFT_COMP, HARD_COMP, GEN_COMP
        NO_COMP, HARD_COMP, SOFT_COMP, GEN_COMP = 0, 1, 2, 3

        self.composition = options.nucleus_composition

        all_rels = vocab[5]
        functional_rels = ['det', 'case', 'clf', 'cop', 'mark', 'aux', 'cc']
        if self.composition in [HARD_COMP, SOFT_COMP]:
            self.compositional_relations = functional_rels
        elif self.composition in [GEN_COMP]:
            self.compositional_relations = all_rels
        else:
            self.compositional_relations = []

        self.compositional_relations_dict = {
            rel: idx
            for idx, rel in enumerate(self.compositional_relations)
        }

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * (
                self.k + 1)
        print("The size of the MLP input layer is {0}".format(mlp_in_dims))

        if self.composition in [SOFT_COMP, GEN_COMP]:
            rel_emb_sz = 10
            self.cmp_rel_lookup = self.model.add_lookup_parameters(
                (len(self.compositional_relations), rel_emb_sz))
            cmb_sz = 2 * 2 * options.lstm_output_size + rel_emb_sz
            out_sz = 2 * options.lstm_output_size
            self.combiner_W1 = self.model.add_parameters((out_sz, cmb_sz),
                                                         name='cmbW1')
            self.combiner_b1 = self.model.add_parameters(out_sz, name='cmbb1')

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, 4, self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
Пример #6
0
    def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options):
        """
        0 = LA, 1 = RA, 2 = SH, 3 = SW
        """

        import dynet as dy  # import here so we don't load Dynet if just running parser.py --help for example
        global dy

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle
        self.shareMLP = options.shareMLP
        self.config_lembed = options.lembed_config

        #vectors used
        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, words, rels,
                                                  langs, w2i, ch, self.nnvecs,
                                                  options)
        self.irels = self.feature_extractor.irels

        #mlps
        mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (self.k + 1)
        if self.config_lembed:
            mlp_in_dims += options.lang_emb_size

        h1 = options.mlp_hidden_dims
        h2 = options.mlp_hidden2_dims
        if not options.multiling or self.shareMLP:
            self.unlabeled_MLP = MLP(self.model, mlp_in_dims, h1, h2, 4,
                                     self.activation)
            self.labeled_MLP = MLP(self.model, mlp_in_dims, h1, h2,
                                   2 * len(rels) + 2, self.activation)
        else:
            self.labeled_mlpdict = {}
            for lang in self.feature_extractor.langs:
                self.labeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1,
                                                 h2, 2 * len(rels) + 2,
                                                 self.activation)

            self.unlabeled_mlpdict = {}
            for lang in self.feature_extractor.langs:
                self.unlabeled_mlpdict[lang] = MLP(self.model, mlp_in_dims, h1,
                                                   h2, 4, self.activation)
Пример #7
0
    def __init__(self, words, pos, rels, w2i, options):
        self.model = dn.Model()
        self.trainer = dn.AdamTrainer(self.model)
        random.seed(1)

        # noinspection PyUnresolvedReferences
        self.activations = {
            'tanh':
            dn.tanh,
            'sigmoid':
            dn.logistic,
            'relu':
            dn.rectify,
            'tanh3':
            (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.options = options
        self.oracle = options.oracle
        self.ldims = options.lstm_dims * 2
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.wordsCount = words
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.relation = options.relation
        if self.relation:
            self.rels = {word: ind for ind, word in enumerate(rels)}
            self.irels = rels
        else:
            self.rels = {"X": 0}
            self.irels = ["X"]

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.window

        self.nnvecs = self.headFlag + self.rlFlag * 2 + self.rlMostFlag * 2
        self.actions = transition_utils.ArcHybridActions(
            self.irels, options.action_file)

        if options.external_embedding is not None:
            self.extrnd, self.elookup, self.edim = nn.get_external_embedding(
                self.model, options.external_embedding)
            print('Load external embedding. Vector dimensions', self.edim)
        else:
            self.extrnd, self.elookup, self.edim = None, None, 0

        dims = self.wdims + self.pdims + self.edim
        self.rnn = nn.BiLSTM(self.model,
                             [dims] + [self.ldims] * options.lstm_layers)

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units
        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(words) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters(
            (len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))

        self.word2lstm = self.model.add_parameters(
            (self.ldims, self.wdims + self.pdims + self.edim))
        self.word2lstmbias = self.model.add_parameters((self.ldims))
        # self.lstm2lstm = self.model.add_parameters((self.ldims, self.ldims * self.nnvecs + self.rdims))
        # self.lstm2lstmbias = self.model.add_parameters((self.ldims))

        input_dims = self.ldims * self.nnvecs * (self.k + 1)
        action_dims = [
            input_dims, self.hidden_units, self.hidden2_units,
            len(self.actions)
        ]
        action_dims = [i for i in action_dims if i != 0]
        self.action_classifier = nn.DenseLayers(self.model, action_dims,
                                                self.activation)

        relation_dims = [
            input_dims, self.hidden_units, self.hidden2_units,
            len(self.actions.decoded_with_relation)
        ]
        relation_dims = [i for i in relation_dims if i != 0]
        self.relation_classifier = nn.DenseLayers(self.model, relation_dims,
                                                  self.activation)

        if self.options.beam_size == 0:
            self.options.beam_search = False
Пример #8
0
    def __init__(self, words, pos, rels, cpos, langs, w2i, ch, options):
        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)
        random.seed(1)

        rels.append('runk')

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle
        self.multiling = options.multiling
        self.ldims = options.lstm_dims
        self.cldims = options.chlstm_dims
        self.wdims = options.wembedding_dims
        self.rdims = options.rembedding_dims
        self.cdims = options.cembedding_dims
        self.langdims = options.lembedding_dims
        self.wordsCount = words
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.chars = {ind: word + 3 for word, ind in enumerate(ch)}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.cpos = {word: ind + 3 for ind, word in enumerate(cpos)}
        self.rels = {word: ind for ind, word in enumerate(rels)}
        if langs:
            self.langs = {word: ind for ind, word in enumerate(langs)}
        else:
            self.langs = None
        self.irels = rels
        self.debug = options.debug

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.window

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)

        self.external_embedding = None
        if options.external_embedding is not None:
            external_embedding_fp = open(options.external_embedding, 'r')
            external_embedding_fp.readline()
            self.external_embedding = {}
            for line in external_embedding_fp:
                line = line.strip().split()
                self.external_embedding[line[0]] = [float(f) for f in line[1:]]

            external_embedding_fp.close()

            self.edim = len(self.external_embedding.values()[0])
            self.noextrn = [0.0 for _ in xrange(self.edim)]  #???
            self.extrnd = {
                word: i + 3
                for i, word in enumerate(self.external_embedding)
            }
            self.elookup = self.model.add_lookup_parameters(
                (len(self.external_embedding) + 3, self.edim))
            for word, i in self.extrnd.iteritems():
                self.elookup.init_row(i, self.external_embedding[word])
            self.extrnd['*PAD*'] = 1
            self.extrnd['*INITIAL*'] = 2

            print 'Load external embedding. Vector dimensions', self.edim

        dims = self.wdims + (self.edim if self.external_embedding is\
                                      not None else 0) + (self.langdims if
                                                          self.multiling else 0) + 2 * self.cldims

        self.surfaceBuilders = [
            dy.VanillaLSTMBuilder(1, dims, self.ldims, self.model),
            dy.VanillaLSTMBuilder(1, dims, self.ldims, self.model)
        ]
        self.bsurfaceBuilders = [
            dy.VanillaLSTMBuilder(1, 2 * self.ldims, self.ldims, self.model),
            dy.VanillaLSTMBuilder(1, 2 * self.ldims, self.ldims, self.model)
        ]

        self.charBuilders = [
            dy.VanillaLSTMBuilder(1, self.cdims, self.cldims, self.model),
            dy.VanillaLSTMBuilder(1, self.cdims, self.cldims, self.model)
        ]

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units
        self.vocab['*PAD*'] = 1
        if self.langs:
            self.langs['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        if self.langs:
            self.langs['*INITIAL*'] = 2

        self.clookup = self.model.add_lookup_parameters(
            (len(ch) + 3, self.cdims))
        self.wlookup = self.model.add_lookup_parameters(
            (len(words) + 3, self.wdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))
        if self.multiling:
            self.langslookup = self.model.add_lookup_parameters(
                (len(langs) + 3, self.langdims))

        #used in the PaddingVec
        self.word2lstm = self.model.add_parameters((self.ldims * 2, dims))
        self.word2lstmbias = self.model.add_parameters((self.ldims * 2))
        self.chPadding = self.model.add_parameters((self.cldims * 2))

        self.hidLayer = self.model.add_parameters(
            (self.hidden_units, self.ldims * 2 * self.nnvecs * (self.k + 1)))
        self.hidBias = self.model.add_parameters((self.hidden_units))

        self.hid2Layer = self.model.add_parameters(
            (self.hidden2_units, self.hidden_units))
        self.hid2Bias = self.model.add_parameters((self.hidden2_units))

        self.outLayer = self.model.add_parameters(
            (4, self.hidden2_units
             if self.hidden2_units > 0 else self.hidden_units))
        self.outBias = self.model.add_parameters((4))

        # r stands for relation

        self.rhidLayer = self.model.add_parameters(
            (self.hidden_units, self.ldims * 2 * self.nnvecs * (self.k + 1)))
        self.rhidBias = self.model.add_parameters((self.hidden_units))

        self.rhid2Layer = self.model.add_parameters(
            (self.hidden2_units, self.hidden_units))
        self.rhid2Bias = self.model.add_parameters((self.hidden2_units))

        self.routLayer = self.model.add_parameters(
            (2 * len(self.irels) + 2, self.hidden2_units
             if self.hidden2_units > 0 else self.hidden_units))
        self.routBias = self.model.add_parameters((2 * len(self.irels) + 2))
Пример #9
0
    def __init__(self, vocab, options):

        # import here so we don't load Dynet if just running parser.py --help for example
        from multilayer_perceptron import MLP
        from feature_extractor import FeatureExtractor
        import dynet as dy
        global dy

        global LEFT_ARC, RIGHT_ARC, SHIFT, SWAP
        LEFT_ARC, RIGHT_ARC, SHIFT, SWAP = 0, 1, 2, 3

        self.model = dy.ParameterCollection()
        self.trainer = dy.AdamTrainer(self.model, alpha=options.learning_rate)

        self.activations = {
            'tanh':
            dy.tanh,
            'sigmoid':
            dy.logistic,
            'relu':
            dy.rectify,
            'tanh3':
            (lambda x: dy.tanh(dy.cwise_multiply(dy.cwise_multiply(x, x), x)))
        }
        self.activation = self.activations[options.activation]

        self.oracle = options.oracle

        self.headFlag = options.headFlag
        self.rlMostFlag = options.rlMostFlag
        self.rlFlag = options.rlFlag
        self.k = options.k
        self.distances = 4  # probe looks at distances between tokens ahead, considering distances:
        # normalized by the smallest, among:
        # s0 - b0
        # s0 - b1
        # b0 - closest bi: if < s0-b0, do a Shift
        # closest si - b0	: if ~= s0-b0, do a reduce

        #dimensions depending on extended features
        self.nnvecs = (1 if self.headFlag else 0) + (2 if self.rlFlag
                                                     or self.rlMostFlag else 0)
        self.feature_extractor = FeatureExtractor(self.model, options, vocab,
                                                  self.nnvecs)
        self.irels = self.feature_extractor.irels

        if options.no_bilstms > 0:  # number of bilistms
            mlp_in_dims = options.lstm_output_size * 2 * self.nnvecs * (
                self.k + 1)
        else:
            mlp_in_dims = self.feature_extractor.lstm_input_size * self.nnvecs * (
                self.k + 1)

        # use attention
        if options.bert and options.attention:
            # add attention vectors for stack to top buf and viceversa
            attention_size = self.k * 2
            # all layers
            #layers = self.feature_extractor.bert.model.config.num_hidden_layers
            #attention_size = layers * layers * self.k # * 2
            mlp_in_dims += attention_size

        # Sartiano
        if options.distance_probe_conf:
            print('Distance Probe enabled', file=sys.stderr)
            from distance_probe import DistanceProbe
            self.distance_probe = DistanceProbe(options.distance_probe_conf,
                                                options.dynet_seed)
            mlp_in_dims += self.distances
        else:
            self.distance_probe = None

        self.attention_indices = [
            int(x) for x in options.attention.split(',')
        ] if options.attention else []

        self.unlabeled_MLP = MLP(self.model, 'unlabeled', mlp_in_dims,
                                 options.mlp_hidden_dims,
                                 options.mlp_hidden2_dims, SWAP + 1,
                                 self.activation)
        self.labeled_MLP = MLP(self.model, 'labeled', mlp_in_dims,
                               options.mlp_hidden_dims,
                               options.mlp_hidden2_dims,
                               2 * len(self.irels) + 2, self.activation)
        print('MLP size: (%d, %d)' % (mlp_in_dims, options.mlp_hidden_dims),
              file=sys.stderr)
Пример #10
0
def leaky_relu(x):
    """:type x: dn.Expression
    :rtype: dn.Expression"""
    positive = dn.rectify(x)
    negative = dn.rectify(-x) * -0.01
    ret = positive + negative
    return ret


activations = {
    'tanh': dn.tanh,
    'sigmoid': dn.logistic,
    'relu': dn.rectify,
    'tanh3':
    (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x))),
    'selu': selu,
    "leaky-relu": leaky_relu
}
trainers = {
    "adam": dn.AdamTrainer,
    "sgd": dn.SimpleSGDTrainer,
    "momentum": dn.MomentumSGDTrainer,
    "rmsprop": dn.RMSPropTrainer
}

recurrent_builders = {"lstm": dn.VanillaLSTMBuilder, "gru": dn.GRUBuilder}


def recurrent_factory_factory(builder):
    # use closure to hold "builder"
Пример #11
0
class MaxSubGraphLSTM(object):

    activations = {
        'tanh': dn.tanh,
        'sigmoid': dn.logistic,
        'relu': dn.rectify,
        'tanh3':
        (lambda x: dn.tanh(dn.cwise_multiply(dn.cwise_multiply(x, x), x)))
    }
    decoders = {
        "arcfactor": decoder.arcfactor,
        "1ec2p": decoder.oneec2p,
        "1ec2p-vine": decoder.oneec2p_vine
    }

    def __init__(self, vocab, pos, rels, w2i, options):
        self.model = dn.Model()
        random.seed(1)
        self.trainer = dn.AdamTrainer(self.model)

        self.activation = self.activations[options.activation]
        self.decoder = self.decoders[options.decoder](options)
        self.test_decoder = self.decoders[options.test_decoder](options) \
            if options.test_decoder is not None \
            else self.decoder
        self.cost_augment = cost_augments[options.cost_augment]

        self.labelsFlag = options.labelsFlag
        self.options = options

        self.ldims = options.lstm_dims
        self.wdims = options.wembedding_dims
        self.pdims = options.pembedding_dims
        self.rdims = options.rembedding_dims
        self.layers = options.lstm_layers
        self.wordsCount = vocab
        self.vocab = {word: ind + 3 for word, ind in w2i.iteritems()}
        self.pos = {word: ind + 3 for ind, word in enumerate(pos)}
        self.rels = {word: ind
                     for ind, word in enumerate(rels)}  # type: dict[str, int]
        self.irels = rels

        if options.external_embedding is not None:
            self.extrnd, self.elookup, self.edim = nn.get_external_embedding(
                self.model, options.external_embedding)
            logger.info('Load external embedding. Vector dimensions %d',
                        self.edim)
        else:
            self.extrnd, self.elookup, self.edim = None, None, 0

        dims = self.wdims + self.pdims + self.edim
        self.rnn = nn.BiLSTM(self.model,
                             [dims] + [self.ldims * 2] * options.lstm_layers)

        self.hidden_units = options.hidden_units
        self.hidden2_units = options.hidden2_units

        self.vocab['*PAD*'] = 1
        self.pos['*PAD*'] = 1

        self.vocab['*INITIAL*'] = 2
        self.pos['*INITIAL*'] = 2

        self.wlookup = self.model.add_lookup_parameters(
            (len(vocab) + 3, self.wdims))
        self.plookup = self.model.add_lookup_parameters(
            (len(pos) + 3, self.pdims))
        self.rlookup = self.model.add_lookup_parameters(
            (len(rels), self.rdims))

        if self.hidden2_units > 0:
            dense_dims = [self.hidden_units, self.hidden2_units, 1]
            use_bias = [True, False]
        else:
            dense_dims = [self.hidden_units, 1]
            # use_bias = [dn.NormalInitializer(0, 0)]
            use_bias = [False]

        self.head_dense_layer = DenseLayers(
            self.model, [self.ldims * 2, self.hidden_units], self.activation)
        self.dep_dense_layer = DenseLayers(self.model,
                                           [self.ldims * 2, self.hidden_units],
                                           self.activation)

        self.fusion_layer = nn.Biaffine(self.model, self.hidden_units,
                                        self.activation)

        if self.labelsFlag:
            self.relation_binear_layer = BiLinear(self.model, self.ldims * 2,
                                                  self.hidden_units)
            relation_dense_dims = list(dense_dims)
            relation_dense_dims[-1] = len(self.irels)

            self.relation_dense_layer = DenseLayers(self.model,
                                                    relation_dense_dims,
                                                    self.activation)

    def get_vecs(self, node):
        wordvec = self.wlookup[int(self.vocab.get(
            node.norm, 0))] if self.wdims > 0 else None
        posvec = self.plookup[int(self.pos.get(node.postag,
                                               0))] if self.pdims > 0 else None
        evec = self.elookup[int(self.extrnd.get(node.form, self.extrnd.get(node.norm, 0)))]\
            if self.edim > 0 else None
        return dn.concatenate(filter(None, [wordvec, posvec, evec]))

    def __evaluate(self, lstm_output):
        length = len(lstm_output)

        # (i, j) -> (i * length + j,)
        # i = k / length, j = k % length
        # 1 1 2 2 3 3 4 4 ..
        heads = [
            dn.transpose(self.activation(self.head_dense_layer(
                lstm_output[i]))) for i in range(length)
        ]
        mods = [
            self.activation(self.dep_dense_layer(lstm_output[i]))
            for i in range(length)
        ]
        head_part = dn.concatenate_to_batch(
            [heads[i // len(lstm_output)] for i in range(length * length)])
        # 1 2 3 4 .. 1 2 3 4 ...
        mod_part = dn.concatenate_to_batch([mods[i]
                                            for i in range(length)] * length)

        output = self.fusion_layer(head_part, mod_part)

        exprs = [[
            dn.pick_batch_elem(output, i * length + j) for j in range(length)
        ] for i in range(length)]
        scores = output.npvalue()
        scores = scores.reshape((len(lstm_output), len(lstm_output)))

        return scores, exprs

    def __evaluate_labels(self, lstm_output, edges):
        """
        :type lstm_output: list[dn.Expression]
        :type edges: Edge
        :return: 
        """
        rheadfov = [None] * len(lstm_output)
        rmodfov = [None] * len(lstm_output)

        for source, label, target in edges:
            if rheadfov[source] is None:
                rheadfov[source] = self.relation_binear_layer.w1.expr(
                ) * lstm_output[source]
            if rmodfov[target] is None:
                rmodfov[target] = self.relation_binear_layer.w2.expr(
                ) * lstm_output[target]

            hidden = self.activation(rheadfov[source] + rmodfov[target] +
                                     self.relation_binear_layer.bias.expr())
            output = self.relation_dense_layer(hidden)

            yield output

    def Save(self, filename):
        self.model.save(filename)

    def Load(self, filename):
        self.model.load(filename)

    def Predict(self, graphs):
        for iSentence, sentence in enumerate(graphs):
            vecs = [self.get_vecs(i) for i in sentence]
            lstm_output = self.rnn([vecs[i] for i in range(len(sentence))])
            scores, exprs = self.__evaluate(lstm_output)

            output_graph = self.test_decoder(scores)

            edges = []
            for source_id in range(len(sentence)):
                for target_id in range(len(sentence)):
                    if output_graph[source_id][target_id]:
                        edges.append(
                            graph_utils.Edge(source_id, "X", target_id))

            if self.labelsFlag:
                labeled_edges = []

                for edge, r_scores_expr in \
                        zip(edges, self.__evaluate_labels(lstm_output, edges)):
                    r_scores = r_scores_expr.value()
                    label_index = max(
                        ((l, scr) for l, scr in enumerate(r_scores)),
                        key=itemgetter(1))[0]
                    label = self.irels[label_index]
                    labeled_edges.append(
                        graph_utils.Edge(edge.source, label, edge.target))
                edges = labeled_edges

            dn.renew_cg()
            yield sentence.replaced_edges(edges)

    def Train(self, graphs):
        """
        :type graphs: [Graph]
        :return: 
        """
        eloss = 0.0
        mloss = 0.0
        total_gold_edge = 0
        total_predict_edge = 0
        recalled_gold_edge = 0.0
        correct_predict_edge = 0.0
        start = time.time()

        shuffled_index = range(len(graphs))
        random.shuffle(shuffled_index)

        iSentence = -1
        for g_idx in shuffled_index:
            sentence = graphs[g_idx]  # type: graph_utils.Graph
            dn.renew_cg()
            iSentence += 1
            if iSentence % 100 == 0 and iSentence != 0:
                logger.info(
                    'Processing sentence number: %d, Loss: %.2f, '
                    'Accuracy: %.2f, Recall: %.2f, Time: %.2f', iSentence,
                    eloss, correct_predict_edge / total_predict_edge * 100,
                    recalled_gold_edge / total_gold_edge * 100,
                    time.time() - start)
                start = time.time()
                eloss = 0.0
                total_gold_edge = 0
                total_predict_edge = 0
                recalled_gold_edge = 0.0
                correct_predict_edge = 0.0

            vecs = [self.get_vecs(i) for i in sentence]
            lstm_output = self.rnn([vecs[i] for i in range(len(sentence))])
            scores, exprs = self.__evaluate(lstm_output)

            self.cost_augment(scores, sentence, self.options)

            output_graph = self.decoder(scores)
            gold_graph = sentence.to_matrix()

            lerrs = []
            if self.labelsFlag:
                edges = list(sentence.generate_edges())
                for edge, r_scores_expr \
                        in zip(edges, self.__evaluate_labels(lstm_output, edges)):
                    head, label, modifier = edge
                    r_scores = r_scores_expr.value()
                    gold_label_index = self.rels[label]
                    wrong_label_index = max(((l, scr)
                                             for l, scr in enumerate(r_scores)
                                             if l != gold_label_index),
                                            key=itemgetter(1))[0]
                    if r_scores[gold_label_index] < r_scores[
                            wrong_label_index] + 1:
                        lerrs.append(r_scores_expr[wrong_label_index] -
                                     r_scores_expr[gold_label_index])

            errs = []
            for source_id in range(len(sentence)):
                for target_id in range(len(sentence)):
                    gold_exist = gold_graph[source_id][target_id]
                    output_exist = output_graph[source_id][target_id]
                    if gold_exist and output_exist:
                        total_gold_edge += 1
                        total_predict_edge += 1
                        correct_predict_edge += 1
                        recalled_gold_edge += 1
                    elif not gold_exist and not output_exist:
                        pass
                    elif gold_exist and not output_exist:
                        total_gold_edge += 1
                        errs.append(-exprs[source_id][target_id] + 1)
                    elif not gold_exist and output_exist:
                        total_predict_edge += 1
                        errs.append(exprs[source_id][target_id])
                    else:
                        raise SystemError()

            if len(errs) > 0 or len(lerrs) > 0:
                loss = dn.scalarInput(0.0)
                if len(lerrs):
                    loss += dn.esum(lerrs)
                if len(errs):
                    loss += dn.esum(errs)
                eloss += loss.scalar_value()
                loss.backward()
                self.trainer.update()

        self.trainer.update_epoch()
        logger.info("Loss: %.2f", mloss / iSentence)