Пример #1
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                dy_params.set_autobatch(True)
            else:
                raise Exception('Tagger currently only supports autobatching.'
                                'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz')
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = False
            # FIXME These should be registered instead
            exporter_type = kwargs.get('exporter_type', 'default')
            if exporter_type == 'default':
                from mead.tf.exporters import TaggerTensorFlowExporter
                backend.exporter = TaggerTensorFlowExporter
            elif exporter_type == 'preproc':
                from mead.tf.preproc_exporters import TaggerTensorFlowPreProcExporter
                import mead.tf.preprocessors
                backend.exporter = TaggerTensorFlowPreProcExporter

        backend.load(self.task_name())

        return backend
Пример #2
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        self.config_params['preproc']['show_ex'] = show_examples
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                self.config_params['train']['trainer_type'] = 'autobatch'
                dy_params.set_autobatch(True)
                batched = False
            else:
                batched = True
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = True
        backend.load(self.task_name())

        return backend
Пример #3
0
 def __init__(self,
              h_layers,
              h_dim,
              vocab_size,
              noise_sigma=0.1,
              trainer="adam",
              clip_threshold=5.0,
              add_hidden=False,
              learning_rate=0.001,
              activation='rectify'):
     self.model = dynet.ParameterCollection()
     self.h_layers = h_layers
     self.h_dim = h_dim
     self.vocab_size = vocab_size
     self.noise_sigma = noise_sigma
     if self.noise_sigma > 0.05:
         print('Noise sigma > %.4f. Training might not work.' % noise_sigma)
     self.layers = []
     self.output_layers_dict = {}
     self.trainer = TRAINER_MAP[trainer](self.model, learning_rate)
     self.trainer.set_clip_threshold(clip_threshold)
     self.task_ids = ["F0", "F1", "Ft"]
     self.add_hidden = add_hidden
     self.activation_func = activation
     self.activation = activation2func(activation)
Пример #4
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                self.config_params['train']['trainer_type'] = 'autobatch'
                dy_params.set_autobatch(True)
                batched = False
            else:
                batched = True
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': batched}
        elif backend.name == 'tf':
            # FIXME this should be registered as well!
            exporter_type = kwargs.get('exporter_type', 'default')
            if exporter_type == 'default':
                from mead.tf.exporters import ClassifyTensorFlowExporter
                backend.exporter = ClassifyTensorFlowExporter
            elif exporter_type == 'preproc':
                from mead.tf.preproc_exporters import ClassifyTensorFlowPreProcExporter
                import mead.tf.preprocessors
                backend.exporter = ClassifyTensorFlowPreProcExporter

        backend.load(self.task_name())

        return backend
Пример #5
0
    def _create_backend(self, **kwargs):
        backend = Backend(self.config_params.get('backend', 'tf'))
        if 'preproc' not in self.config_params:
            self.config_params['preproc'] = {}
        if backend.name == 'pytorch':
            self.config_params['preproc']['trim'] = True
        elif backend.name == 'dy':
            import _dynet
            dy_params = _dynet.DynetParams()
            dy_params.from_args()
            dy_params.set_requested_gpus(1)
            if 'autobatchsz' in self.config_params['train']:
                dy_params.set_autobatch(True)
            else:
                raise Exception('Tagger currently only supports autobatching.'
                                'Change "batchsz" to 1 and under "train", set "autobatchsz" to your desired batchsz')
            dy_params.init()
            backend.params = {'pc': _dynet.ParameterCollection(), 'batched': False}
            self.config_params['preproc']['trim'] = True
        else:
            self.config_params['preproc']['trim'] = False

        backend.load(self.task_name())

        return backend
Пример #6
0
 def load(cls,
          word_embeddings,
          base_file,
          embedding_dim=None,
          hidden_dim=None,
          classes_dim=None):
     pc = dy.ParameterCollection()
     matrices = dy.load(base_file, pc)
     # matrices = matrices[1:] # for now, skip "E"
     return cls(word_embeddings, embedding_dim, hidden_dim, classes_dim, pc,
                matrices)
Пример #7
0
 def __init__(self,
              h_layers,
              h_dim,
              vocab_size,
              noise_sigma=0.1,
              trainer="adam",
              clip_threshold=5.0,
              activation='rectify'):
     self.model = dynet.ParameterCollection()
     self.h_layers = h_layers
     self.h_dim = h_dim
     self.vocab_size = vocab_size
     self.noise_sigma = noise_sigma
     self.activation = activation2func(activation)
     self.layers = []
     self.trainer = TRAINER_MAP[trainer](self.model)
     self.trainer.set_clip_threshold(clip_threshold)
Пример #8
0
 def __init__(self,word_size,context_fre, context_size,vocab,window=2,subsample_n=2000,mode='bow',embed_size=200, batch_size=128,num_sampled=5, epoch=6):
     self.embed_size = embed_size
     self.mode = mode
     self.window = window
     self.vocab = vocab
     self.word_size = word_size
     self.subsample_n = subsample_n
     self.context_size = context_size
     self.num_sampled = num_sampled
     self.epoch = epoch
     self.context_fre = context_fre
     self.batch_size=batch_size
     self.pc = dy.ParameterCollection()
     self.optimizer = dy.AdamTrainer(self.pc)
     self.word_embeddings = self.pc.add_lookup_parameters((self.word_size, self.embed_size), name="word-embeddings")
     self.context_embeddings = self.pc.add_lookup_parameters((self.context_size, self.embed_size), name="context-embeddings")
     dy.renew_cg()
     print ([(param.name(), param.shape()) for param in self.pc.lookup_parameters_list() + self.pc.parameters_list()])
Пример #9
0
def create_network_params(nwords, ntags, external_E=None):
    # create a parameter collection and add the parameters.
    print("adding parameters")
    m = dy.ParameterCollection()

    print("nwords: {}".format(nwords))
    E = m.add_lookup_parameters((nwords, EMB), name='E')
    if external_E and sum(external_E.shape) > 0:
        assert external_E.shape[1] == EMB
        external_rows = external_E.shape[0]
        for r in range(external_rows):
            E.init_row(r, external_E[r, :])

    b = m.add_parameters(HIDDEN, name='b')
    U = m.add_parameters((ntags, HIDDEN), name='U')
    W = m.add_parameters((HIDDEN, INPUT), name='W')
    bp = m.add_parameters(ntags, name='bp')
    dy.renew_cg()
    return m, E, b, U, W, bp
Пример #10
0
    def __init__(self,
                 word_embeddings,
                 embedding_dim=None,
                 hidden_dim=None,
                 classes_dim=None,
                 pc=None,
                 trained_matrices=None):
        embedding_dim = embedding_dim or 300
        hidden_dim = hidden_dim or 200
        classes_dim = classes_dim or 3

        self.embeddings = word_embeddings
        self.embedding_dim, self.hidden_dim, self.classes_dim = embedding_dim, hidden_dim, classes_dim

        if pc and trained_matrices:
            print "loading pretrained inputs"
            self.pc = pc
            first_attend_index = 2 + DIMR_DEPTH * 2  # the *2 accounts for the w and b in each layer
            first_compare_index = first_attend_index + ATTEND_DEPTH * 2
            first_agg_index = first_compare_index + COMPARE_DEPTH * 2

            print "dimension reducer", range(1, first_attend_index)
            print "attend", range(first_attend_index, first_compare_index)
            print "compare", range(first_compare_index, first_agg_index)
            print "aggregate", range(first_agg_index, len(trained_matrices))

            self.dimension_reducer = self._create_dimension_reducer(
                trained_matrices[1:first_attend_index])
            self.attend = self._create_attend(
                trained_matrices[first_attend_index:first_compare_index])
            self.compare = self._create_compare(
                trained_matrices[first_compare_index:first_agg_index])
            self.aggregate = self._create_aggregate(
                trained_matrices[first_agg_index:])
            self.params = {"E": trained_matrices[0]}
        else:
            self.pc = dy.ParameterCollection()
            self.dimension_reducer = self._create_dimension_reducer()
            self.attend = self._create_attend()
            self.compare = self._create_compare()
            self.aggregate = self._create_aggregate()

            self.params = {"E": word_embeddings.as_dynet_lookup(self.pc)}
Пример #11
0
 def __init__(self,
              in_dim,
              h_dim,
              c_in_dim,
              h_layers,
              embeds_file=None,
              activation=dynet.tanh,
              noise_sigma=0.1,
              word2id=None,
              add_hidden=False,
              trainer="adam",
              clip_threshold=5.0,
              learning_rate=0.001,
              adversarial_domains=None):
     self.w2i = {} if word2id is None else word2id  # word to index mapping
     self.c2i = {}  # char to index mapping
     self.tag2idx = {}  # tag to tag_id mapping
     self.model = dynet.ParameterCollection()  # init model
     # init trainer
     train_algo = TRAINER_MAP[trainer]
     self.trainer = train_algo(self.model, learning_rate)
     if clip_threshold:
         self.trainer.set_clip_threshold(clip_threshold)
     self.in_dim = in_dim
     self.h_dim = h_dim
     self.c_in_dim = c_in_dim
     self.activation = activation
     self.noise_sigma = noise_sigma
     self.h_layers = h_layers
     self.predictors = {
         "inner": [],
         "output_layers_dict": {},
         "task_expected_at": {}
     }  # the inner layers and predictors
     self.wembeds = None  # lookup: embeddings for words
     self.cembeds = None  # lookup: embeddings for characters
     self.embeds_file = embeds_file
     self.char_rnn = None  # RNN for character input
     self.task_ids = ["F0", "F1", "Ft"]
     self.add_hidden = add_hidden
     self.adversarial_domains = adversarial_domains
Пример #12
0
    def create_computation_graph(self,
                                 num_lemmas,
                                 num_pos,
                                 num_dep,
                                 num_directions,
                                 num_relations,
                                 wv=None,
                                 lemma_dimension=50):
        model = dy.ParameterCollection()
        if self.opt['use_path']:
            input_dim = self.opt['PATH_LSTM_HIDDEN_DIM']
        else:
            input_dim = 0

        # dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
        builder = dy.LSTMBuilder(
            self.opt['NUM_LAYERS'], lemma_dimension + self.opt['POS_DIM'] +
            self.opt['DEP_DIM'] + self.opt['DIR_DIM'], input_dim, model)

        model_parameters = {}

        for k, v in feat_dims.items():
            model_parameters[k] = model.add_lookup_parameters(v)
        # Concatenate x and y
        if self.opt['use_xy_embeddings']:
            input_dim += 2 * lemma_dimension
        if self.opt['use_features']:
            for name, dim in feat_dims.items():
                if 'diff' in name and not self.opt['use_freq_features']:
                    continue
                input_dim += dim[1]
        if self.opt['use_height_ebd']:
            model_parameters['height_lookup'] = model.add_lookup_parameters(
                (10, self.opt['height_ebd_dim']))
            input_dim += self.opt['height_ebd_dim']

        model_parameters['lemma_lookup'] = model.add_lookup_parameters(
            (num_lemmas, lemma_dimension))
        builder_hist = dy.LSTMBuilder(2, input_dim,
                                      self.opt['HIST_LSTM_HIDDEN_DIM'], model)

        # Pre-trained word embeddings
        if wv is not None:
            model_parameters['lemma_lookup'].init_from_array(wv)

        model_parameters['pos_lookup'] = model.add_lookup_parameters(
            (num_pos, self.opt['POS_DIM']))
        model_parameters['dep_lookup'] = model.add_lookup_parameters(
            (num_dep, self.opt['DEP_DIM']))
        model_parameters['dir_lookup'] = model.add_lookup_parameters(
            (num_directions, self.opt['DIR_DIM']))

        if not self.opt['one_layer']:
            if self.opt['use_history']:
                model_parameters['W2_rl'] = model.add_parameters(
                    (input_dim, self.opt['MLP_HIDDEN_DIM']))
                model_parameters['b2_rl'] = model.add_parameters(
                    (input_dim, 1))
                model_parameters['W1_rl'] = model.add_parameters(
                    (self.opt['MLP_HIDDEN_DIM'],
                     self.opt['HIST_LSTM_HIDDEN_DIM']))
                model_parameters['b1_rl'] = model.add_parameters(
                    (self.opt['MLP_HIDDEN_DIM'], 1))
            else:
                model_parameters['W2_rl'] = model.add_parameters(
                    (input_dim, self.opt['MLP_HIDDEN_DIM']))
                model_parameters['b2_rl'] = model.add_parameters(
                    (1, self.opt['MLP_HIDDEN_DIM']))
                model_parameters['W1_rl'] = model.add_parameters(
                    (self.opt['MLP_HIDDEN_DIM'], 1))
                model_parameters['b1_rl'] = model.add_parameters((1, 1))
        else:
            if self.opt['use_history']:
                model_parameters['W1_rl'] = model.add_parameters(
                    (input_dim, self.opt['HIST_LSTM_HIDDEN_DIM']))
                model_parameters['b1_rl'] = model.add_parameters(
                    (input_dim, 1))
            else:
                model_parameters['W1_rl'] = model.add_parameters(
                    (input_dim, 1))
                model_parameters['b1_rl'] = model.add_parameters((1, 1))

        if self.opt['load_model_file'] is not None:
            print 'model loaded from', self.opt['load_model_file']
            model.populate('{}'.format(self.opt['load_model_file']))
            if self.opt['load_opt']:
                print 'opt loaded from', '{}.json'.format(
                    self.opt['load_model_file'])
                self.opt = json.load(
                    open('{}.json'.format(self.opt['load_model_file'])))

        return builder, model, model_parameters, builder_hist, input_dim
Пример #13
0
 def __init__(self):
     self.pc = dy.ParameterCollection()
     self.params = {}
     self.last_output = None
     self.with_bias = None
Пример #14
0
test_ins, test_act, test_init, test_id = load('test_final.json')
dev_gt_int = pd.read_csv('./dev_interaction_y.csv',
                         index_col="id")['final_world_state'].values

# Dynet PART
## Initial SEQ2SEQ NETWORK
LAYERS = 1
INPUT_DIM = 50
char_DIM = 20
HIDDEN_DIM = 100
ATTENTION_DIM = HIDDEN_DIM
VOCAB_SIZE_input = len(vocab)
VOCAB_SIZE_out = len(vocab_out)
VOCAB_char = len(int2char)

pc = dy.ParameterCollection()
encoder = dy.CompactVanillaLSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
decoder = dy.CompactVanillaLSTMBuilder(LAYERS, INPUT_DIM + HIDDEN_DIM * 2,
                                       HIDDEN_DIM, pc)
params_encoder = {}
params_encoder["lookup"] = pc.add_lookup_parameters(
    (VOCAB_SIZE_input, INPUT_DIM))

params_decoder = {}
params_decoder["lookup"] = pc.add_lookup_parameters(
    (VOCAB_SIZE_out, INPUT_DIM))
params_decoder["R"] = pc.add_parameters((VOCAB_SIZE_out, HIDDEN_DIM))
params_decoder["bias"] = pc.add_parameters((VOCAB_SIZE_out))
params_decoder["attention_w"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
params_decoder["attention_b"] = pc.add_parameters((ATTENTION_DIM))
params_decoder["attention_wc"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
Пример #15
0
    argparser.add_argument('--dev_fscore', required=True)
    argparser.add_argument('--unsupervised', action="store_true")
    argparser.add_argument('--use_bert', action="store_true")
    args, extra_args = argparser.parse_known_args()
    args.config_file = "configs/{}.cfg".format(
        args.model[:args.model.find('Parser') + 6])
    config = Configurable(args.config_file, extra_args)

    dyparams = dy.DynetParams()
    # dyparams.from_args()
    # dyparams.set_autobatch(True)
    dyparams.set_random_seed(666)
    dyparams.set_mem(5120)
    dyparams.init()

    model = dy.ParameterCollection()
    model_path = config.load_model_path + \
        args.model + "_dev={}".format(args.dev_fscore)
    # model_path = config.load_model_path + "GNNParser2_50epoch"

    [parser] = dy.load(model_path, model)
    print("Loaded model from {}".format(model_path))

    if args.use_bert:
        test_bert_embeddings = parser.vocab.load_bert_embeddings(
            config.test_bert_file)
        print('Loaded bert embeddings!')

    testing_data = parser.vocab.gold_data_from_file(config.test_file)
    print("Loaded testing data from {}".format(config.test_file))
Пример #16
0
def new_model():
    return dy.ParameterCollection()
def create_computation_graph(num_lemmas,
                             num_pos,
                             num_dep,
                             num_directions,
                             num_relations,
                             wv=None,
                             use_xy_embeddings=False,
                             num_hidden_layers=0,
                             lemma_dimension=50):
    """
    Initialize the model
    :param num_lemmas Number of distinct lemmas
    :param num_pos Number of distinct part of speech tags
    :param num_dep Number of distinct depenedency labels
    :param num_directions Number of distinct path directions (e.g. >,<)
    :param num_relations Number of classes (e.g. binary = 2)
    :param wv Pre-trained word embeddings file
    :param use_xy_embeddings Whether to concatenate x and y word embeddings to the network input
    :param num_hidden_layers The number of hidden layers for the term-pair classification network
    :param lemma_dimension The dimension of the lemma embeddings
    :return:
    """
    # model = Model() -- gives error? tried to fix by looking at dynet tutorial examples -- GB
    dy.renew_cg()
    # Renew the computation graph.
    # Call this before building any new computation graph

    model = dy.ParameterCollection()
    # ParameterCollection to hold the parameters

    network_input = LSTM_HIDDEN_DIM

    builder = dy.LSTMBuilder(NUM_LAYERS,
                             lemma_dimension + POS_DIM + DEP_DIM + DIR_DIM,
                             network_input, model)

    # Concatenate x and y
    if use_xy_embeddings:
        network_input += 2 * lemma_dimension

    #  'the optimal size of the hidden layer is usually between the size of the input and size of the output layers'
    hidden_dim = int((network_input + num_relations) / 2)

    model_parameters = {}

    if num_hidden_layers == 0:
        # model_parameters['W_cnn'] = model.add_parameters((1, WIN_SIZE, EMB_SIZE, FILTER_SIZE))  # cnn weights
        # model_parameters['b_cnn'] = model.add_parameters((FILTER_SIZE))  # cnn bias

        model_parameters['W1'] = model.add_parameters(
            (num_relations, network_input))
        model_parameters['b1'] = model.add_parameters((num_relations, 1))
    # A ParameterCollection is a container for Parameters and LookupParameters.
    # dynet.Trainer objects take ParameterCollection objects that define which parameters are being trained.

    elif num_hidden_layers == 1:

        model_parameters['W1'] = model.add_parameters(
            (hidden_dim, network_input))
        model_parameters['b1'] = model.add_parameters((hidden_dim, 1))
        model_parameters['W2'] = model.add_parameters(
            (num_relations, hidden_dim))
        model_parameters['b2'] = model.add_parameters((num_relations, 1))

    else:
        raise ValueError('Only 0 or 1 hidden layers are supported')

    model_parameters['lemma_lookup'] = model.add_lookup_parameters(
        (num_lemmas, lemma_dimension))
    #LookupParameters represents a table of parameters.
    # They are used to embed a set of discrete objects (e.g. word embeddings). These are sparsely updated.

    # Pre-trained word embeddings
    if wv is not None:
        model_parameters['lemma_lookup'].init_from_array(wv)

    model_parameters['pos_lookup'] = model.add_lookup_parameters(
        (num_pos, POS_DIM))
    model_parameters['dep_lookup'] = model.add_lookup_parameters(
        (num_dep, DEP_DIM))
    model_parameters['dir_lookup'] = model.add_lookup_parameters(
        (num_directions, DIR_DIM))

    return builder, model, model_parameters
Пример #18
0
def create_computation_graph(num_lemmas,
                             num_pos,
                             num_dep,
                             num_directions,
                             num_relations,
                             wv=None,
                             use_xy_embeddings=False,
                             num_hidden_layers=0,
                             lemma_dimension=50):
    """
    Initialize the model
    :param num_lemmas Number of distinct lemmas
    :param num_pos Number of distinct part of speech tags
    :param num_dep Number of distinct depenedency labels
    :param num_directions Number of distinct path directions (e.g. >,<)
    :param num_relations Number of classes (e.g. binary = 2)
    :param wv Pre-trained word embeddings file
    :param use_xy_embeddings Whether to concatenate x and y word embeddings to the network input
    :param num_hidden_layers The number of hidden layers for the term-pair classification network
    :param lemma_dimension The dimension of the lemma embeddings
    :return:
    """
    # model = Model() -- gives error? tried to fix by looking at dynet tutorial examples -- GB
    dy.renew_cg()
    model = dy.ParameterCollection()
    network_input = LSTM_HIDDEN_DIM

    builder = dy.LSTMBuilder(NUM_LAYERS,
                             lemma_dimension + POS_DIM + DEP_DIM + DIR_DIM,
                             network_input, model)

    # Concatenate x and y
    if use_xy_embeddings:
        network_input += 2 * lemma_dimension

    #  'the optimal size of the hidden layer is usually between the size of the input and size of the output layers'
    hidden_dim = int((network_input + num_relations) / 2)

    model_parameters = {}

    if num_hidden_layers == 0:
        model_parameters['W1'] = model.add_parameters(
            (num_relations, network_input))
        model_parameters['b1'] = model.add_parameters((num_relations, 1))

    elif num_hidden_layers == 1:

        model_parameters['W1'] = model.add_parameters(
            (hidden_dim, network_input))
        model_parameters['b1'] = model.add_parameters((hidden_dim, 1))
        model_parameters['W2'] = model.add_parameters(
            (num_relations, hidden_dim))
        model_parameters['b2'] = model.add_parameters((num_relations, 1))

    else:
        raise ValueError('Only 0 or 1 hidden layers are supported')

    model_parameters['lemma_lookup'] = model.add_lookup_parameters(
        (num_lemmas, lemma_dimension))

    # Pre-trained word embeddings
    if wv is not None:
        model_parameters['lemma_lookup'].init_from_array(wv)

    model_parameters['pos_lookup'] = model.add_lookup_parameters(
        (num_pos, POS_DIM))
    model_parameters['dep_lookup'] = model.add_lookup_parameters(
        (num_dep, DEP_DIM))
    model_parameters['dir_lookup'] = model.add_lookup_parameters(
        (num_directions, DIR_DIM))

    return builder, model, model_parameters
    def __init__(
        self,
        embed_size,
        word_hidden_size,
        training_file,
        dev_file,
        test_file,
        batch_size,
        model_file,
        lstm_feats,
        crf_feats,
        autoencoder,
        train_features,
        dev_features,
        test_features,
        testing,
        restart,
        feat_func,
    ):
        self.crf_feats = crf_feats
        self.lstm_feats = lstm_feats
        self.autoencoder = autoencoder
        self.embed_size = embed_size
        self.word_hidden_size = word_hidden_size
        self.model_file = model_file

        self.featsize = 0

        self.word_vocab = defaultdict(lambda: len(self.word_vocab))
        self.char_vocab = defaultdict(lambda: len(self.char_vocab))
        self.tag_vocab = defaultdict(lambda: len(self.tag_vocab))
        self.word_lookup = []

        self.training_data = self.read_train(training_file, train_features)
        self.dev_data = self.read_test(dev_file, dev_features)
        self.test_data = self.read_test(test_file, test_features)
        self.batch_size = batch_size
        self.reverse_tag_lookup = dict((v, k) for k, v in self.tag_vocab.items())
        self.reverse_word_lookup = dict((v, k) for k, v in self.word_vocab.items())

        self.model = dy.ParameterCollection()

        self.cnn = CNNModule(self.model, self.char_vocab)
        self.word_embeds = self.model.add_lookup_parameters(
            (len(self.word_vocab), embed_size)
        )
        arr = np.array(self.word_lookup)
        self.word_embeds.init_from_array(arr)
        self.word_lstm = dy.BiRNNBuilder(
            1,
            CNN_OUT_SIZE + embed_size + FEAT_OUT_SIZE,
            word_hidden_size,
            self.model,
            dy.LSTMBuilder,
        )

        self.feat_w = self.model.add_parameters((FEAT_OUT_SIZE, self.featsize))
        self.feat_b = self.model.add_parameters((FEAT_OUT_SIZE))
        self.feat_func = feat_func

        num_tags = len(self.tag_vocab) + 2
        self.num_tags = num_tags

        # Last linear layer to map the output of the LSTM to the tag space
        self.context_to_emit_w = self.model.add_parameters(
            (len(self.tag_vocab), word_hidden_size + FEAT_OUT_SIZE)
        )
        self.context_to_emit_b = self.model.add_parameters((len(self.tag_vocab)))
        self.crf_module = CRFModule(self.model, self.tag_vocab)

        self.o_tag = self.tag_vocab["O"]

        self.context_to_trans_w = self.model.add_parameters(
            (num_tags * num_tags, word_hidden_size + FEAT_OUT_SIZE)
        )
        self.context_to_trans_b = self.model.add_parameters((num_tags * num_tags))

        self.feat_reconstruct_w = self.model.add_parameters(
            (self.featsize, word_hidden_size)
        )
        self.feat_reconstruct_b = self.model.add_parameters((self.featsize))

        if DROPOUT > 0.0:
            self.word_lstm.set_dropout(DROPOUT)

        if os.path.exists(self.model_file) and (testing or restart):
            self.model.populate(self.model_file)
            print("Populated!")
            v_acc = self.get_accuracy(self.dev_data, print_out="dev.")
            print("Validation F1: %f\n" % v_acc)
    def __init__(
        self,
        src1_vocab,
        src2_vocab,
        tgt_vocab,
        single,
        pointer_gen,
        coverage,
        diag_loss,
        load_model,
        model_file,
        beam_size,
        best_val_cer,
    ):
        self.model = dy.ParameterCollection()

        self.src1_vocab = src1_vocab
        self.src2_vocab = src2_vocab
        self.tgt_vocab = tgt_vocab

        self.src1_lookup = self.model.add_lookup_parameters(
            (src1_vocab.length(), EMBEDDING_DIM)
        )
        self.src2_lookup = self.model.add_lookup_parameters(
            (src2_vocab.length(), EMBEDDING_DIM)
        )
        self.tgt_lookup = self.model.add_lookup_parameters(
            (tgt_vocab.length(), EMBEDDING_DIM)
        )

        self.enc1_fwd_lstm = dy.CoupledLSTMBuilder(
            LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model
        )
        self.enc1_bwd_lstm = dy.CoupledLSTMBuilder(
            LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model
        )
        self.pret1_w = self.model.add_parameters((src1_vocab.length(), HIDDEN_DIM))
        self.pret1_b = self.model.add_parameters((src1_vocab.length()))

        self.enc2_fwd_lstm = dy.CoupledLSTMBuilder(
            LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model
        )
        self.enc2_bwd_lstm = dy.CoupledLSTMBuilder(
            LSTM_NUM_OF_LAYERS, EMBEDDING_DIM, HIDDEN_DIM, self.model
        )
        self.pret2_w = self.model.add_parameters((src2_vocab.length(), HIDDEN_DIM))
        self.pret2_b = self.model.add_parameters((src2_vocab.length()))

        self.att1_w1 = self.model.add_parameters((ATTENTION_SIZE, HIDDEN_DIM * 2))
        self.att1_w2 = self.model.add_parameters(
            (ATTENTION_SIZE, HIDDEN_DIM * LSTM_NUM_OF_LAYERS * 2)
        )
        self.att1_v = self.model.add_parameters((1, ATTENTION_SIZE))

        self.att2_w1 = self.model.add_parameters((ATTENTION_SIZE, HIDDEN_DIM * 2))
        self.att2_w2 = self.model.add_parameters(
            (ATTENTION_SIZE, HIDDEN_DIM * LSTM_NUM_OF_LAYERS * 2)
        )
        self.att2_v = self.model.add_parameters((1, ATTENTION_SIZE))

        self.dec_lstm = dy.CoupledLSTMBuilder(
            LSTM_NUM_OF_LAYERS, HIDDEN_DIM * 4 + EMBEDDING_DIM, HIDDEN_DIM, self.model
        )
        self.W_s = self.model.add_parameters((HIDDEN_DIM, HIDDEN_DIM * 4))
        self.b_s = self.model.add_parameters((HIDDEN_DIM))
        self.dec_w = self.model.add_parameters((tgt_vocab.length(), HIDDEN_DIM))
        self.dec_b = self.model.add_parameters((tgt_vocab.length()))

        # Pointer-generator parameters
        self.ptr_w_c = self.model.add_parameters((1, 2 * HIDDEN_DIM))
        self.ptr_w_s = self.model.add_parameters((1, 2 * HIDDEN_DIM))
        self.ptr_w_x = self.model.add_parameters((1, EMBEDDING_DIM + 4 * HIDDEN_DIM))

        # Coverage parameters
        self.w_cov = self.model.add_parameters((ATTENTION_SIZE, 1))

        self.single_source = single
        self.pointer_gen = pointer_gen
        self.coverage = coverage
        self.diag_loss = diag_loss
        self.model_file = model_file

        if load_model:
            self.model.populate(load_model)
            logging.info("Loaded model: {}".format(load_model))

        self.beam_size = beam_size
        self.best_val_cer = best_val_cer
Пример #21
0
 def __init__(self,
              in_dim,
              h_dim,
              c_in_dim,
              c_h_dim,
              h_layers,
              pred_layer,
              learning_algo="sgd",
              learning_rate=0,
              embeds_file=None,
              activation=ACTIVATION_MAP["tanh"],
              mlp=0,
              activation_mlp=ACTIVATION_MAP["rectify"],
              backprob_embeds=True,
              noise_sigma=0.1,
              w_dropout_rate=0.25,
              c_dropout_rate=0.25,
              initializer=INITIALIZER_MAP["glorot"],
              builder=BUILDERS["lstmc"],
              crf=False,
              viterbi_loss=False,
              mimickx_model_path=None,
              dictionary=None,
              type_constraint=False,
              lex_dim=0,
              embed_lex=False):
     self.w2i = {}  # word to index mapping
     self.c2i = {}  # char to index mapping
     self.w2c_cache = {}  # word to char index cache for frequent words
     self.wcount = None  # word count
     self.ccount = None  # char count
     self.task2tag2idx = {}  # need one dictionary per task
     self.pred_layer = [int(layer) for layer in pred_layer
                        ]  # at which layer to predict each task
     self.model = dynet.ParameterCollection()  #init model
     self.in_dim = in_dim
     self.h_dim = h_dim
     self.c_in_dim = c_in_dim
     self.c_h_dim = c_h_dim
     self.w_dropout_rate = w_dropout_rate
     self.c_dropout_rate = c_dropout_rate
     self.activation = activation
     self.mlp = mlp
     self.activation_mlp = activation_mlp
     self.noise_sigma = noise_sigma
     self.h_layers = h_layers
     self.predictors = {
         "inner": [],
         "output_layers_dict": {},
         "task_expected_at": {}
     }  # the inner layers and predictors
     self.wembeds = None  # lookup: embeddings for words
     self.cembeds = None  # lookup: embeddings for characters
     self.lembeds = None  # lookup: embeddings for lexical features (optional)
     self.embeds_file = embeds_file
     trainer_algo = TRAINER_MAP[learning_algo]
     if learning_rate > 0:
         ### TODO: better handling of additional learning-specific parameters
         self.trainer = trainer_algo(self.model,
                                     learning_rate=learning_rate)
     else:
         # using default learning rate
         self.trainer = trainer_algo(self.model)
     self.backprob_embeds = backprob_embeds
     self.initializer = initializer
     self.char_rnn = None  # biRNN for character input
     self.builder = builder  # default biRNN is an LSTM
     self.crf = crf
     self.viterbi_loss = viterbi_loss
     self.mimickx_model_path = mimickx_model_path
     if mimickx_model_path:  # load
         self.mimickx_model = load_model(mimickx_model_path)
     self.dictionary = None
     self.type_constraint = type_constraint
     self.embed_lex = False
     self.l2i = {UNK: 0}  # lex feature to index mapping
     if dictionary:
         self.dictionary, self.dictionary_values = load_dict(dictionary)
         self.path_to_dictionary = dictionary
         if type_constraint:
             self.lex_dim = 0
         else:
             if embed_lex:
                 self.lex_dim = lex_dim
                 self.embed_lex = True
                 print("Embed lexical features")
                 # register property indices
                 for prop in self.dictionary_values:
                     self.l2i[prop] = len(self.l2i)
             else:
                 self.lex_dim = len(self.dictionary_values)  #n-hot encoding
             print("Lex_dim: {}".format(self.lex_dim), file=sys.stderr)
     else:
         self.dictionary = None
         self.path_to_dictionary = None
         self.lex_dim = 0
Пример #22
0
def build_model(input_vocabulary, output_vocabulary, input_dim, hidden_dim,
                layers):
    # define all model parameters
    # TODO: add logic for "smart" parameter allocation according to the user's chosen architecture
    print 'creating model...'
    sys.stdout.flush()

    model = dn.ParameterCollection()

    params = {}

    # input embeddings
    params['input_lookup'] = model.add_lookup_parameters(
        (len(input_vocabulary), input_dim))

    # init vector for input feeding
    params['init_lookup'] = model.add_lookup_parameters((1, 3 * hidden_dim))

    # output embeddings
    params['output_lookup'] = model.add_lookup_parameters(
        (len(output_vocabulary), input_dim))

    # used in softmax output
    params['readout'] = model.add_parameters(
        (len(output_vocabulary), 3 * hidden_dim))
    params['bias'] = model.add_parameters(len(output_vocabulary))

    # rnn's
    if bool(arguments['--compact']):
        params['encoder_frnn'] = dn.CompactVanillaLSTMBuilder(
            layers, input_dim, hidden_dim, model)
        params['encoder_rrnn'] = dn.CompactVanillaLSTMBuilder(
            layers, input_dim, hidden_dim, model)
    else:
        params['encoder_frnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim,
                                                model)
        params['encoder_rrnn'] = dn.LSTMBuilder(layers, input_dim, hidden_dim,
                                                model)

    # attention MLPs - Luong-style with extra v_a from Bahdanau

    # concatenation layer for h (hidden dim), c (2 * hidden_dim)
    params['w_c'] = model.add_parameters((3 * hidden_dim, 3 * hidden_dim))

    # concatenation layer for h_input (hidden_dim), h_output (hidden_dim)
    params['w_a'] = model.add_parameters((hidden_dim, hidden_dim))

    # concatenation layer for h (hidden dim), c (2 * hidden_dim)
    params['u_a'] = model.add_parameters((hidden_dim, 2 * hidden_dim))

    # concatenation layer for h_input (2 * hidden_dim), h_output (hidden_dim)
    params['v_a'] = model.add_parameters((1, hidden_dim))

    # 3 * HIDDEN_DIM + input_dim - gets the feedback output embedding, "input feeding" approach for attn
    params['decoder_rnn'] = dn.LSTMBuilder(layers, 3 * hidden_dim + input_dim,
                                           hidden_dim, model)

    print 'finished creating model'
    sys.stdout.flush()

    return model, params