def __init__(self, character_embeddings_size, encodings, rnn_size=100, rnn_layers=1, embeddings_size=100, model=None, runtime=False): if model is None: self.model = dy.Model() else: self.model = model self.encodings = encodings self.character_embeddings_size = character_embeddings_size self.embeddings_size = embeddings_size self.num_characters = len(encodings.char2int) self.character_lookup = self.model.add_lookup_parameters( (self.num_characters, character_embeddings_size)) self.rnn_fw = [] self.rnn_bw = [] self.rnn_layers = rnn_layers self.rnn_size = rnn_size input_size = character_embeddings_size + 3 for _ in xrange(rnn_layers): if runtime: self.rnn_fw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( dy.VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) self.rnn_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, rnn_size, self.model)) input_size = rnn_size * 2 self.linearW = self.model.add_parameters( (embeddings_size, rnn_size * 4)) # last state and attention over the other states self.linearB = self.model.add_parameters((embeddings_size)) self.att_w1 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_w2 = self.model.add_parameters((rnn_size, rnn_size * 2)) self.att_v = self.model.add_parameters((1, rnn_size))
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.encodings = encodings # Bug in encodings - will be removed after UD self.has_bug=False if self.encodings.char2int[' ']!=1: self.has_bug=True import sys sys.stdout.write("Detected encodings BUG!") self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.word2lemma={} self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size, self.config.rnn_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size, self.config.rnn_size, self.model) # self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2)) # self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size)) # self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings + self.config.tag_embeddings_size)) self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 3, self.config.rnn_size)) self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 3)) ofs = len(self.encodings.char2int) self.label2int = {} self.label2int['<EOS>'] = ofs self.label2int['<COPY>'] = ofs + 1 self.label2int['<INC>'] = ofs + 2
def __init__(self, lemmatizer_config, encodings, embeddings, runtime=False): self.config = lemmatizer_config self.encodings = encodings # Bug in encodings - this will be removed after UD Shared Task self.has_bug = False if self.encodings.char2int[' '] != 1: self.has_bug = True self.embeddings = embeddings self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.character_network = CharacterNetwork(self.config.tag_embeddings_size, encodings, rnn_size=self.config.char_rnn_size, rnn_layers=self.config.char_rnn_layers, embeddings_size=self.config.char_embeddings, model=self.model, runtime=runtime) self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.tag_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.tag_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.tag_embeddings_size)) self.char_lookup = self.model.add_lookup_parameters((len(self.encodings.char2int), self.config.char_embeddings)) if runtime: self.rnn = dy.LSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.rnn = orthonormal_VanillaLSTMBuilder(self.config.rnn_layers, self.config.char_rnn_size * 2 + self.config.char_embeddings, self.config.rnn_size, self.model) self.att_w1 = self.model.add_parameters((200, self.config.char_rnn_size * 2)) self.att_w2 = self.model.add_parameters((200, self.config.rnn_size + self.config.tag_embeddings_size)) self.att_v = self.model.add_parameters((1, 200)) self.start_lookup = self.model.add_lookup_parameters( (1, self.config.char_rnn_size * 2 + self.config.char_embeddings)) self.softmax_w = self.model.add_parameters((len(self.encodings.char2int) + 1, self.config.rnn_size)) self.softmax_b = self.model.add_parameters((len(self.encodings.char2int) + 1)) self.softmax_casing_w = self.model.add_parameters((2, self.config.rnn_size)) self.softmax_casing_b = self.model.add_parameters((2))
def __init__(self, src_we, dst_we, input_encodings, output_encodings, config): self.config = config self.losses = [] self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.src_we = src_we self.dst_we = dst_we self.input_encodings = input_encodings self.output_encodings = output_encodings # encoder self.encoder_fw = [] self.encoder_bw = [] input_size = config.input_size for layer_size in self.config.encoder_layers: self.encoder_fw.append( orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) self.encoder_bw.append( orthonormal_VanillaLSTMBuilder(1, input_size, layer_size, self.model)) input_size = layer_size * 2 # decoder #self.decoder = [] #for layer_size in self.config.decoder_layers: self.decoder = orthonormal_VanillaLSTMBuilder( config.decoder_layers, input_size + self.config.input_size, config.decoder_size, self.model) input_size = config.decoder_size # output softmax self.output_softmax_w = self.model.add_parameters( (len(self.output_encodings.word2int) + 1, input_size)) self.output_softmax_b = self.model.add_parameters( (len(self.output_encodings.word2int) + 1)) self.EOS = len(self.output_encodings.word2int) # aux WE layer self.aux_layer_w = self.model.add_parameters( (self.config.aux_we_layer_size, self.config.decoder_size)) self.aux_layer_b = self.model.add_parameters( (self.config.aux_we_layer_size)) # aux WE projection self.aux_layer_proj_w = self.model.add_parameters( (self.dst_we.word_embeddings_size, self.config.aux_we_layer_size)) self.aux_layer_proj_b = self.model.add_parameters( (self.dst_we.word_embeddings_size)) # input projection self.word_proj_w = self.model.add_parameters( (self.config.input_size, self.src_we.word_embeddings_size)) self.word_proj_b = self.model.add_parameters((self.config.input_size)) self.hol_we_src = self.model.add_lookup_parameters( (len(self.input_encodings.word2int), self.config.input_size)) self.hol_we_dst = self.model.add_lookup_parameters( (len(self.output_encodings.word2int), self.config.input_size)) self.special_we = self.model.add_lookup_parameters( (2, self.config.input_size)) # attention self.att_w1 = self.model.add_parameters( (self.config.encoder_layers[-1] * 2, self.config.encoder_layers[-1] * 2)) self.att_w2 = self.model.add_parameters( (self.config.encoder_layers[-1] * 2, self.config.decoder_size)) self.att_v = self.model.add_parameters( (1, self.config.encoder_layers[-1] * 2))
def __init__(self, tagger_config, encodings, embeddings, aux_softmax_weight=0.2, runtime=False): self.config = tagger_config self.encodings = encodings self.embeddings = embeddings self.model = dy.Model() self.trainer = dy.AdamTrainer( self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) # dy.MomentumSGDTrainer(self.model) self.trainer.set_sparse_updates(False) self.character_network = CharacterNetwork( 100, encodings, rnn_size=200, rnn_layers=1, embeddings_size=self.embeddings.word_embeddings_size, model=self.model, runtime=runtime) self.unknown_word_embedding = self.model.add_lookup_parameters( (1, self.embeddings.word_embeddings_size)) self.holistic_word_embedding = self.model.add_lookup_parameters( (len(encodings.word2int), self.embeddings.word_embeddings_size)) self.char_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.emb_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.hol_proj_w = self.model.add_parameters( (self.config.input_size, self.embeddings.word_embeddings_size)) self.bdrnn_fw = [] self.bdrnn_bw = [] rnn_input_size = self.config.input_size # self.embeddings.word_embeddings_size aux_softmax_input_size = 0 index = 0 for layer_size in self.config.layers: if runtime: self.bdrnn_fw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) else: self.bdrnn_fw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) rnn_input_size = layer_size * 2 index += 1 if index == self.config.aux_softmax_layer: aux_softmax_input_size = rnn_input_size self.mlps = [] for _ in xrange(3): # upos, xpos and attrs mlp_w = [] mlp_b = [] input_sz = self.config.layers[-1] * 2 for l_size in self.config.presoftmax_mlp_layers: mlp_w.append(self.model.add_parameters((l_size, input_sz))) mlp_b.append(self.model.add_parameters((l_size))) input_sz = l_size self.mlps.append([mlp_w, mlp_b]) softmax_input_size = self.config.presoftmax_mlp_layers[-1] self.softmax_upos_w = self.model.add_parameters( (len(self.encodings.upos2int), softmax_input_size)) self.softmax_upos_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.softmax_xpos_w = self.model.add_parameters( (len(self.encodings.xpos2int), softmax_input_size)) self.softmax_xpos_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.softmax_attrs_w = self.model.add_parameters( (len(self.encodings.attrs2int), softmax_input_size)) self.softmax_attrs_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.aux_softmax_upos_w = self.model.add_parameters( (len(self.encodings.upos2int), aux_softmax_input_size)) self.aux_softmax_upos_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.aux_softmax_xpos_w = self.model.add_parameters( (len(self.encodings.xpos2int), aux_softmax_input_size)) self.aux_softmax_xpos_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.aux_softmax_attrs_w = self.model.add_parameters( (len(self.encodings.attrs2int), aux_softmax_input_size)) self.aux_softmax_attrs_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.aux_softmax_weight = aux_softmax_weight self.losses = []
def __init__(self, params, model=None): self.UPSAMPLE_PROJ = 200 self.RNN_SIZE = 448 self.RNN_LAYERS = 1 self.OUTPUT_EMB_SIZE = 1 self.params = params if model is None: self.model = dy.Model() else: self.model = model self.trainer = dy.AdamTrainer(self.model, alpha=1e-4) self.trainer.set_sparse_updates(True) self.trainer.set_clip_threshold(5.0) # self.trainer = dy.AdamTrainer(self.model) # MGCs are extracted at 12.5 ms upsample_count = int(12.5 * self.params.target_sample_rate / 1000) # self.upsample_w_s = [] self.upsample_w_t = [] # self.upsample_b_s = [] self.upsample_b_t = [] for _ in xrange(upsample_count): # self.upsample_w_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ, self.params.mgc_order))) self.upsample_w_t.append( self.model.add_parameters( (self.UPSAMPLE_PROJ, self.params.mgc_order * 2))) # self.upsample_b_s.append(self.model.add_parameters((self.UPSAMPLE_PROJ))) self.upsample_b_t.append( self.model.add_parameters((self.UPSAMPLE_PROJ))) self.output_coarse_lookup = self.model.add_lookup_parameters( (256, self.OUTPUT_EMB_SIZE)) self.output_fine_lookup = self.model.add_lookup_parameters( (256, self.OUTPUT_EMB_SIZE)) from utils import orthonormal_VanillaLSTMBuilder # self.rnn = orthonormal_VanillaLSTMBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) self.rnnCoarse = orthonormal_VanillaLSTMBuilder( self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) self.rnnFine = orthonormal_VanillaLSTMBuilder( self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ, self.RNN_SIZE, self.model) # self.rnnCoarse = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 2 + self.UPSAMPLE_PROJ, # self.RNN_SIZE, self.model) # self.rnnFine = dy.GRUBuilder(self.RNN_LAYERS, self.OUTPUT_EMB_SIZE * 3 + self.UPSAMPLE_PROJ, # self.RNN_SIZE, self.model) self.mlp_coarse_w = [] self.mlp_coarse_b = [] self.mlp_coarse_w.append( self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE))) self.mlp_coarse_b.append(self.model.add_parameters((self.RNN_SIZE))) self.mlp_fine_w = [] self.mlp_fine_b = [] self.mlp_fine_w.append( self.model.add_parameters((self.RNN_SIZE, self.RNN_SIZE))) self.mlp_fine_b.append(self.model.add_parameters((self.RNN_SIZE))) self.softmax_coarse_w = self.model.add_parameters((256, self.RNN_SIZE)) self.softmax_coarse_b = self.model.add_parameters((256)) self.softmax_fine_w = self.model.add_parameters((256, self.RNN_SIZE)) self.softmax_fine_b = self.model.add_parameters((256))
def __init__(self, params, num_phones, phone2int, model=None): self.model = model self.params = params self.PHONE_EMBEDDINGS_SIZE = 100 self.ENCODER_SIZE = 200 self.ENCODER_LAYERS = 2 self.DECODER_SIZE = 200 self.DECODER_LAYERS = 2 self.phone2int = phone2int if self.model is None: self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.trainer.set_sparse_updates(True) self.trainer.set_clip_threshold(5.0) self.phone_lookup = self.model.add_lookup_parameters( (num_phones + 2, self.PHONE_EMBEDDINGS_SIZE)) from utils import orthonormal_VanillaLSTMBuilder self.encoder_fw = [] self.encoder_bw = [] self.encoder_fw.append( orthonormal_VanillaLSTMBuilder(1, self.PHONE_EMBEDDINGS_SIZE, self.ENCODER_SIZE, self.model)) self.encoder_bw.append( orthonormal_VanillaLSTMBuilder(1, self.PHONE_EMBEDDINGS_SIZE, self.ENCODER_SIZE, self.model)) for zz in xrange(1, self.ENCODER_LAYERS): self.encoder_fw.append( orthonormal_VanillaLSTMBuilder(1, self.ENCODER_SIZE * 2, self.ENCODER_SIZE, self.model)) self.encoder_bw.append( orthonormal_VanillaLSTMBuilder(1, self.ENCODER_SIZE * 2, self.ENCODER_SIZE, self.model)) self.decoder = dy.VanillaLSTMBuilder(self.DECODER_LAYERS, self.ENCODER_SIZE * 2 + 100, self.DECODER_SIZE, self.model) # self.aux_hid_w = self.model.add_parameters((500, self.ENCODER_SIZE * 2)) # self.aux_hid_b = self.model.add_parameters((500)) # self.aux_proj_w = self.model.add_parameters((params.mgc_order, 500)) # self.aux_proj_b = self.model.add_parameters((params.mgc_order)) self.hid_w = self.model.add_parameters((500, self.DECODER_SIZE)) self.hid_b = self.model.add_parameters((500)) self.proj_w_1 = self.model.add_parameters((params.mgc_order, 500)) self.proj_b_1 = self.model.add_parameters((params.mgc_order)) self.proj_w_2 = self.model.add_parameters((params.mgc_order, 500)) self.proj_b_2 = self.model.add_parameters((params.mgc_order)) self.proj_w_3 = self.model.add_parameters((params.mgc_order, 500)) self.proj_b_3 = self.model.add_parameters((params.mgc_order)) self.highway_w = self.model.add_parameters( (params.mgc_order, self.ENCODER_SIZE * 2)) self.last_mgc_proj_w = self.model.add_parameters( (100, self.params.mgc_order)) self.last_mgc_proj_b = self.model.add_parameters((100)) # self.last_att_proj_w = self.model.add_parameters((200, self.ENCODER_SIZE * 2)) # self.last_att_proj_b = self.model.add_parameters((200)) self.stop_w = self.model.add_parameters((1, self.DECODER_SIZE)) self.stop_b = self.model.add_parameters((1)) self.att_w1 = self.model.add_parameters((100, self.ENCODER_SIZE * 2)) self.att_w2 = self.model.add_parameters((100, self.DECODER_SIZE)) self.att_v = self.model.add_parameters((1, 100)) self.start_lookup = self.model.add_lookup_parameters( (1, params.mgc_order)) self.decoder_start_lookup = self.model.add_lookup_parameters( (1, self.ENCODER_SIZE * 2 + 100))
def __init__(self, parser_config, encodings, embeddings, aux_softmax_weight=0.2, runtime=False): self.config = parser_config self.encodings = encodings self.embeddings = embeddings self.decoder = GreedyDecoder() self.model = dy.Model() # self.trainer = dy.SimpleSGDTrainer(self.model) self.trainer = dy.AdamTrainer(self.model, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.trainer.set_sparse_updates(False) self.character_network = CharacterNetwork( 100, encodings, rnn_size=200, rnn_layers=1, embeddings_size=self.config.input_embeddings_size, model=self.model, runtime=runtime) self.holistic_embeddings = self.model.add_lookup_parameters( (len(self.encodings.word2int), self.config.input_embeddings_size)) self.input_proj_w_word = self.model.add_parameters( (self.config.input_embeddings_size, self.embeddings.word_embeddings_size)) self.input_proj_b_word = self.model.add_parameters( (self.config.input_embeddings_size)) self.unknown_word_embedding = self.model.add_lookup_parameters( (3, self.config.input_embeddings_size)) # for padding lexical self.pad_tag_embedding = self.model.add_lookup_parameters( (3, self.config.input_embeddings_size)) # for padding morphology self.bdrnn_fw = [] self.bdrnn_bw = [] rnn_input_size = 0 if self.config.use_lexical: rnn_input_size += self.config.input_embeddings_size if self.config.use_morphology: rnn_input_size += self.config.input_embeddings_size self.upos_lookup = self.model.add_lookup_parameters( (len(self.encodings.upos2int), self.config.input_embeddings_size)) self.xpos_lookup = self.model.add_lookup_parameters( (len(self.encodings.xpos2int), self.config.input_embeddings_size)) self.attrs_lookup = self.model.add_lookup_parameters( (len(self.encodings.attrs2int), self.config.input_embeddings_size)) index = 0 aux_proj_input_size = 0 for layer_size in self.config.layers: if runtime: self.bdrnn_fw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( dy.VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) else: self.bdrnn_fw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) self.bdrnn_bw.append( orthonormal_VanillaLSTMBuilder(1, rnn_input_size, layer_size, self.model)) rnn_input_size = layer_size * 2 index += 1 if index == self.config.aux_softmax_layer: aux_proj_input_size = rnn_input_size proj_input_size = self.config.layers[-1] * 2 self.proj_arc_w_head = self.model.add_parameters( (self.config.arc_proj_size, proj_input_size)) self.proj_arc_b_head = self.model.add_parameters( (self.config.arc_proj_size)) self.proj_arc_w_dep = self.model.add_parameters( (self.config.arc_proj_size, proj_input_size)) self.proj_arc_b_dep = self.model.add_parameters( (self.config.arc_proj_size)) self.proj_label_w_head = self.model.add_parameters( (self.config.label_proj_size, proj_input_size)) self.proj_label_b_head = self.model.add_parameters( (self.config.label_proj_size)) self.proj_label_w_dep = self.model.add_parameters( (self.config.label_proj_size, proj_input_size)) self.proj_label_b_dep = self.model.add_parameters( (self.config.label_proj_size)) if not self.config.predict_morphology: self.aux_proj_arc_w_head = self.model.add_parameters( (self.config.arc_proj_size, aux_proj_input_size)) self.aux_proj_arc_b_head = self.model.add_parameters( (self.config.arc_proj_size)) self.aux_proj_arc_w_dep = self.model.add_parameters( (self.config.arc_proj_size, aux_proj_input_size)) self.aux_proj_arc_b_dep = self.model.add_parameters( (self.config.arc_proj_size)) else: self.upos_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.xpos_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.attrs_proj_w = self.model.add_parameters( (self.config.label_proj_size, aux_proj_input_size)) self.upos_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.xpos_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.attrs_proj_b = self.model.add_parameters( (self.config.label_proj_size)) self.link_b = self.model.add_parameters((1, self.config.arc_proj_size)) self.link_w = self.model.add_parameters( (self.config.arc_proj_size, self.config.arc_proj_size)) self.label_ww = self.model.add_parameters( (1, len(self.encodings.label2int))) self.label_w = self.model.add_parameters( (len(self.encodings.label2int), self.config.label_proj_size * 2)) self.label_bb = self.model.add_parameters( (len(self.encodings.label2int))) if not self.config.predict_morphology: self.aux_link_w = self.model.add_parameters( (self.config.arc_proj_size, self.config.arc_proj_size)) self.aux_link_b = self.model.add_parameters( (1, self.config.arc_proj_size)) else: self.upos_softmax_w = self.model.add_parameters( (len(self.encodings.upos2int), self.config.label_proj_size)) self.xpos_softmax_w = self.model.add_parameters( (len(self.encodings.xpos2int), self.config.label_proj_size)) self.attrs_softmax_w = self.model.add_parameters( (len(self.encodings.attrs2int), self.config.label_proj_size)) self.upos_softmax_b = self.model.add_parameters( (len(self.encodings.upos2int))) self.xpos_softmax_b = self.model.add_parameters( (len(self.encodings.xpos2int))) self.attrs_softmax_b = self.model.add_parameters( (len(self.encodings.attrs2int))) self.lemma_softmax_b = self.model.add_parameters( (len(self.encodings.char2int) + 1)) self.lemma_softmax_casing_b = self.model.add_parameters((2)) self.aux_softmax_weight = aux_softmax_weight self.batch_loss = []
def __init__(self, word_size, tag_size, rel_size, input_dim, hidden_dim, pdrop_embs, pdrop_lstm, pdrop_mlp, layers, mlp_dim, arc_dim, biaffine_bias_x_arc, biaffine_bias_y_arc, biaffine_bias_x_rel, biaffine_bias_y_rel, embs_word=None): self._global_step = 0 self._early_stop_count = 0 self._update = False self._best_score = 0. self._best_score_las = 0. self._punct_id = 0 self._masks_w = [] self._masks_t = [] self._vocab_size_w = word_size self._vocab_size_t = tag_size self._vocab_size_r = rel_size self._mlp_dim = mlp_dim self._arc_dim = arc_dim self._rel_dim = mlp_dim - arc_dim self.biaffine_bias_x_arc = biaffine_bias_x_arc self.biaffine_bias_y_arc = biaffine_bias_y_arc self.biaffine_bias_x_rel = biaffine_bias_x_rel self.biaffine_bias_y_rel = biaffine_bias_y_rel self._pc = dy.ParameterCollection() if config.adam: self._trainer = dy.AdamTrainer(self._pc, config.learning_rate, config.beta_1, config.beta_2, config.epsilon) else: # self._trainer = dy.AdadeltaTrainer(self._pc) trainer = dy.SimpleSGDTrainer(self._pc, config.learning_rate) trainer.set_clip_threshold(config.clip_threshold) # self._trainer.set_clip_threshold(1.0) self.params = dict() if embs_word is None: self.lp_w = self._pc.add_lookup_parameters( (word_size, input_dim), init=dy.ConstInitializer(0.)) else: self.lp_w = self._pc.lookup_parameters_from_numpy(embs_word) self.lp_t = self._pc.add_lookup_parameters( (tag_size, input_dim), init=dy.ConstInitializer(0.)) self.emb_root = self._pc.add_lookup_parameters( (2, input_dim), init=dy.ConstInitializer(0.)) # if config.isTest: # self.l2r_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # self.r2l_lstm = dy.VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # else: # self.l2r_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # self.r2l_lstm = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) self._pdrop_embs = pdrop_embs self._pdrop_lstm = pdrop_lstm self._pdrop_mlp = pdrop_mlp self.LSTM_builders = [] # f = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # b = utils.orthonormal_VanillaLSTMBuilder(layers, input_dim * 2, hidden_dim, self._pc) # # self.LSTM_builders = [f, b] f = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim, self._pc) b = utils.orthonormal_VanillaLSTMBuilder(1, input_dim * 2, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) for i in range(layers - 1): f = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) b = utils.orthonormal_VanillaLSTMBuilder(1, 2 * hidden_dim, hidden_dim, self._pc) self.LSTM_builders.append((f, b)) if config.biaffine: W = utils.orthonormal_initializer(mlp_dim, 2 * hidden_dim) self.mlp_dep = self._pc.parameters_from_numpy(W) self.mlp_head = self._pc.parameters_from_numpy(W) self.mlp_dep_bias = self._pc.add_parameters( (mlp_dim, ), init=dy.ConstInitializer(0.)) self.mlp_head_bias = self._pc.add_parameters( (mlp_dim, ), init=dy.ConstInitializer(0.)) else: W = utils.orthonormal_initializer(mlp_dim * 2, 2 * hidden_dim) self.mlp = self._pc.parameters_from_numpy(W) self.mlp_bias = self._pc.add_parameters( (mlp_dim * 2, ), init=dy.ConstInitializer(0.)) # self.mlp_arc_size = mlp_arc_size # self.mlp_rel_size = mlp_rel_size # self.dropout_mlp = dropout_mlp if config.biaffine: self.W_arc = self._pc.add_parameters( (self._arc_dim, self._arc_dim + 1), init=dy.ConstInitializer(0.)) self.W_rel = self._pc.add_parameters( (self._vocab_size_r * (self._rel_dim + 1), self._rel_dim + 1), init=dy.ConstInitializer(0.)) else: self.V_r_arc = self._pc.add_parameters((self._arc_dim)) self.V_i_arc = self._pc.add_parameters((self._arc_dim)) self.bias_arc = self._pc.add_parameters((self._arc_dim * 2)) self.V_r_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r)) self.V_i_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r)) self.bias_rel = self._pc.add_parameters( (self._rel_dim * self._vocab_size_r * 2)) return
def __init__(self, config, encodings, embeddings, runtime=False): # INTERNAL PARAMS ################################################### self.config = config self.encodings = encodings self.word_embeddings = embeddings self.config.char_vocabulary_size = len(encodings.characters) self.decoder_output_class_count = 3 # O S SX self.decoder_output_i2c = {} self.decoder_output_i2c[0] = "O" self.decoder_output_i2c[1] = "S" self.decoder_output_i2c[2] = "SX" self.decoder_output_c2i = {} self.decoder_output_c2i["O"] = 0 self.decoder_output_c2i["S"] = 1 self.decoder_output_c2i["SX"] = 2 # NETWORK ########################################################### self.model = dy.Model() self.trainer = dy.AdamTrainer(self.model) self.trainer.set_sparse_updates(False) # EMBEDDING SPECIAL TOKENS self.word_embeddings_special = self.model.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size )) # [0] = UNK, [1] = SENTENCE START # ENCODER-CHAR self.char_embeddings = self.model.add_lookup_parameters( (self.config.char_vocabulary_size, self.config.char_embedding_size)) # self.next_chars_embedding = self.model.add_lookup_parameters( # (self.config.char_vocabulary_size, self.config.next_chars_embedding_size)) self.char_embeddings_punctuation = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_whitespace = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.char_embeddings_uppercase = self.model.add_lookup_parameters( (self.config.char_generic_feature_vocabulary_size, self.config.char_generic_feature_embedding_size)) self.encoder_char_input_size = self.config.char_embedding_size + 3 * self.config.char_generic_feature_embedding_size if runtime: self.encoder_char_lstm1_fw_builder = dy.VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = dy.VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = dy.VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) else: from utils import orthonormal_VanillaLSTMBuilder self.encoder_char_lstm1_fw_builder = orthonormal_VanillaLSTMBuilder( 1, self.encoder_char_input_size, self.config.encoder_char_lstm_size, self.model) self.encoder_char_lstm2_bw_builder = orthonormal_VanillaLSTMBuilder( 1, self.config.next_chars_embedding_size + 3 * self.config.char_generic_feature_embedding_size, self.config.encoder_char_lstm_size, self.model) self.encoder_word_lstm_builder = orthonormal_VanillaLSTMBuilder( 1, self.word_embeddings.word_embeddings_size, self.config.encoder_word_lstm_size, self.model) # ENCODER-WORD # self.att_w1 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_w2 = self.model.add_parameters(( # self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3, # self.config.encoder_char_lstm_size)) # self.att_v = self.model.add_parameters( # (1, self.config.next_chars_embedding_size + self.config.char_generic_feature_embedding_size * 3)) # DECODER self.holisticWE = self.model.add_lookup_parameters( (len(encodings.word2int), self.word_embeddings.word_embeddings_size)) self.decoder_input_size = 2 * self.config.encoder_char_lstm_size + self.config.encoder_word_lstm_size + self.word_embeddings.word_embeddings_size self.decoder_hiddenW = self.model.add_parameters( (self.config.decoder_hidden_size, self.decoder_input_size)) self.decoder_hiddenB = self.model.add_parameters( (self.config.decoder_hidden_size)) self.decoder_outputW = self.model.add_parameters( (self.decoder_output_class_count, self.config.decoder_hidden_size)) self.decoder_outputB = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_peek_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_peek_b = self.model.add_parameters( (self.decoder_output_class_count)) self.aux_softmax_char_hist_w = self.model.add_parameters( (self.decoder_output_class_count, self.config.encoder_char_lstm_size)) self.aux_softmax_char_hist_b = self.model.add_parameters( (self.decoder_output_class_count)) print("done")
def __init__(self, config, encodings, embeddings, runtime=False): self.config = config self.word_embeddings = embeddings self.encodings = encodings self.modelSS = dy.Model() self.modelTok = dy.Model() self.trainerSS = dy.AdamTrainer(self.modelSS, alpha=2e-3, beta_1=0.9, beta_2=0.9) self.trainerTok = dy.AdamTrainer(self.modelTok, alpha=2e-3, beta_1=0.9, beta_2=0.9) # sentence split model from wrappers import CNN, CNNConvLayer, CNNPoolingLayer from utils import orthonormal_VanillaLSTMBuilder # character-level-embeddings self.SS_char_lookup = self.modelSS.add_lookup_parameters( (len(self.encodings.char2int), self.config.ss_char_embeddings_size)) self.SS_char_lookup_casing = self.modelSS.add_lookup_parameters( (3, 5)) # lower, upper N/A self.SS_char_lookup_special = self.modelSS.add_lookup_parameters( (2, self.config.ss_char_embeddings_size + 5)) # lstm-peek network if runtime: self.SS_peek_lstm = dy.VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) else: self.SS_peek_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_peek_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_peek_lstm_size, self.modelSS) layer_is = self.config.ss_peek_lstm_size self.SS_aux_softmax_peek_w = self.modelSS.add_parameters((2, layer_is)) self.SS_aux_softmax_peek_b = self.modelSS.add_parameters((2)) if runtime: self.SS_lstm = dy.VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) else: self.SS_lstm = orthonormal_VanillaLSTMBuilder( self.config.ss_lstm_layers, self.config.ss_char_embeddings_size + 5, self.config.ss_lstm_size, self.modelSS) self.SS_aux_softmax_prev_w = self.modelSS.add_parameters( (2, self.config.ss_lstm_size)) self.SS_aux_softmax_prev_b = self.modelSS.add_parameters((2)) # post MLP and softmax self.SS_mlp_w = [] self.SS_mlp_b = [] layer_is = self.config.ss_lstm_size + self.config.ss_peek_lstm_size for layer in self.config.ss_mlp_layers: self.SS_mlp_w.append(self.modelSS.add_parameters( (layer, layer_is))) self.SS_mlp_b.append(self.modelSS.add_parameters((layer))) layer_is = layer self.SS_mlp_softmax_w = self.modelSS.add_parameters((2, layer_is)) self.SS_mlp_softmax_b = self.modelSS.add_parameters((2)) # tokenization model self.TOK_char_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.char2int), self.config.tok_char_embeddings_size)) self.TOK_char_lookup_casing = self.modelTok.add_lookup_parameters( (3, 5)) # lower, upper N/A self.TOK_char_lookup_special = self.modelTok.add_lookup_parameters( (2, self.config.tok_char_embeddings_size + 5)) self.TOK_word_lookup = self.modelTok.add_lookup_parameters( (len(self.encodings.word2int), self.config.tok_word_embeddings_size)) self.TOK_word_embeddings_special = self.modelTok.add_lookup_parameters( (2, self.word_embeddings.word_embeddings_size)) self.TOK_word_proj_w = self.modelTok.add_parameters( (self.config.tok_word_embeddings_size, self.word_embeddings.word_embeddings_size)) # lstm networks if runtime: self.TOK_backward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = dy.VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = dy.VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) else: self.TOK_backward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_peek_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_peek_lstm_size, self.modelTok) self.TOK_forward_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_char_lstm_layers, self.config.tok_char_embeddings_size + 5, self.config.tok_char_lstm_size, self.modelTok) self.TOK_word_lstm = orthonormal_VanillaLSTMBuilder( self.config.tok_word_lstm_layers, self.config.tok_word_embeddings_size, self.config.tok_word_lstm_size, self.modelTok) self.TOK_mlp_w = [] self.TOK_mlp_b = [] layer_input = self.config.tok_word_lstm_size + self.config.tok_char_lstm_size + self.config.tok_char_peek_lstm_size + 2 + self.config.tok_word_embeddings_size for layer_size in self.config.tok_mlp_layers: self.TOK_mlp_w.append( self.modelTok.add_parameters((layer_size, layer_input))) self.TOK_mlp_b.append(self.modelTok.add_parameters((layer_size))) layer_input = layer_size self.TOK_softmax_w = self.modelTok.add_parameters((2, layer_input)) self.TOK_softmax_b = self.modelTok.add_parameters((2)) self.TOK_softmax_peek_w = self.modelTok.add_parameters( (2, self.config.tok_char_peek_lstm_size)) self.TOK_softmax_peek_b = self.modelTok.add_parameters((2)) self.TOK_softmax_prev_w = self.modelTok.add_parameters( (2, self.config.tok_char_lstm_size)) self.TOK_softmax_prev_b = self.modelTok.add_parameters((2)) self.losses = [] self.losses_tok = []