def init_model(vocab_size, char_type_size): model = FunctionSet( embed=F.EmbedID(vocab_size, embed_units), char_type_embed=F.EmbedID(char_type_size, char_type_embed_units), #dict_embed = F.Linear(12, dict_embed_units), hidden1=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), i_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), f_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), o_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), output=F.Linear(hidden_units + 12, label_num), ) if opt_selection == 'Adagrad': opt = optimizers.AdaGrad(lr=learning_rate) elif opt_selection == 'SGD': opt = optimizers.SGD() elif opt_selection == 'Adam': opt = optimizers.Adam() else: opt = optimizers.AdaGrad(lr=learning_rate) print('Adagrad is chosen as defaut') opt.setup(model) return model, opt
def init_model(): #Make models if use_pre2 == 'pre': pre_unit = 4 else: pre_unit = 0 if use_null == 'null': null_unit = 6 else: null_unit = 0 if args.phrase == 'phrase': phrase_unit = 4 model = chainer.FunctionSet( trainable=chainer.FunctionSet( w0=F.Linear(n_units * 2 + null_unit * 2, n_label), ww0=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ww1=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) else: model = chainer.FunctionSet( trainable=chainer.FunctionSet(w0=F.Linear( n_units * 4 + null_unit * 4, n_label), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) if opt_name == 'SGD': optimizer = optimizers.SGD(lr=0.02) # (lr=opt_score) # lr=0.01 elif opt_name == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=0.001) # (lr=opt_score) # lr=0.001 elif opt_name == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=0.9) # (rho=opt_score) # rho=0.9 elif opt_name == 'Adam': optimizer = optimizers.Adam( alpha=0.0001) # (alpha=opt_score) # alpha=0.0001 optimizer.setup(model) # .collect_parameters() return model, optimizer
def __make_model(self): self.__model = wrapper.make_model( w_xh=functions.EmbedID(2 * self.__n_context * len(self.__vocab), self.__n_hidden), w_hy=functions.Linear(self.__n_hidden, self.__n_labels), trans=functions.EmbedID( self.__n_labels * self.__n_labels, 1), #各ラベル(0,1)間の遷移のweight #確率としておく softmaxかます )
def init_model(vocab_size): model = chainer.FunctionSet( embed=F.EmbedID(vocab_size, embed_units), hidden1=F.Linear(window * embed_units, hidden_units), output=F.Linear(hidden_units, label_num), trans=F.EmbedID(label_num, label_num), ) #opt = optimizers.AdaGrad(lr=learning_rate) opt = optimizers.Adam() opt.setup(model) return model, opt
def __make_model(self): self.__model = wrapper.make_model( # encoder w_xi = functions.EmbedID(len(self.__src_vocab), self.__n_embed), w_ip = functions.Linear(self.__n_embed, 4 * self.__n_hidden), w_pp = functions.Linear(self.__n_hidden, 4 * self.__n_hidden), # decoder w_pq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_qj = functions.Linear(self.__n_hidden, self.__n_embed), w_jy = functions.Linear(self.__n_embed, len(self.__trg_vocab)), w_yq = functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden), w_qq = functions.Linear(self.__n_hidden, 4 * self.__n_hidden), )
def make_model(self): self.model = wrapper.make_model( # encoder weight_xi=functions.EmbedID(len(self.src_vocab), self.n_embed), weight_ip=functions.Linear(self.n_embed, 4 * self.n_hidden), weight_pp=functions.Linear(self.n_hidden, 4 * self.n_hidden), # decoder weight_pq=functions.Linear(self.n_hidden, 4 * self.n_hidden), weight_qj=functions.Linear(self.n_hidden, self.n_embed), weight_jy=functions.Linear(self.n_embed, len(self.trg_vocab)), weight_yq=functions.EmbedID(len(self.trg_vocab), 4 * self.n_hidden), weight_qq=functions.Linear(self.n_hidden, 4 * self.n_hidden), )
def init_model(vocab_size, char_type_size): model = chainer.FunctionSet( embed=F.EmbedID(vocab_size, embed_units), char_type_embed = F.EmbedID(char_type_size, char_type_embed_units), hidden1=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units), i_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units), f_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units), o_gate=F.Linear(window * (embed_units + char_type_embed_units) + hidden_units, hidden_units), output=F.Linear(hidden_units, label_num), ) #opt = optimizers.AdaGrad(lr=learning_rate) #opt = optimizers.SGD() opt = optimizers.Adam() opt.setup(model) return model, opt
def __init__(self, n_units, vocab_in, vocab_out, loadpath=None, gpu=-1): self.xp = np self.tagger = igo.tagger.Tagger(self.DIC_DIR) self.vocab_in = vocab_in self.vocab_out = vocab_out self.n_units = n_units if loadpath: with open(loadpath, 'rb') as f: self.model = pickle.load(f) else: self.model = chainer.FunctionSet( embed=F.EmbedID(len(self.vocab_in), n_units), l1_x=F.Linear(self.n_units, 4 * self.n_units), l1_h=F.Linear(self.n_units, 4 * self.n_units), l2_x=F.Linear(self.n_units, 4 * self.n_units), l2_h=F.Linear(self.n_units, 4 * self.n_units), l3=F.Linear(self.n_units, len(self.vocab_out)), ) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model)
def __init__(self, n_vocab, doc_length, wv_size, filter_sizes=[3, 4, 5], hidden_units=[100, 2], output_channel=100, initialW=None, non_static=False): super(NNModel, self).__init__() self.filter_sizes = filter_sizes self.hidden_units = hidden_units self.doc_length = doc_length self.non_static = non_static self.add_link( 'embed', F.EmbedID(n_vocab, wv_size, initialW=initialW, ignore_label=0)) for filter_h in self.filter_sizes: filter_w = wv_size filter_shape = (filter_h, filter_w) self.add_link('conv' + str(filter_h), L.Convolution2D(1, output_channel, filter_shape)) for i in range(len(hidden_units)): self.add_link('l' + str(i), L.Linear(None, hidden_units[i]))
def setUp(self): self.func = functions.EmbedID(3, 2) self.func.gW.fill(0) self.W = self.func.W.copy() # fixed on CPU self.x = numpy.array([0, 1, 0], dtype=numpy.int32) self.gy = numpy.random.uniform(-1, 1, (3, 2)).astype(numpy.float32)
def __init__(self, n_vocab, n_docs, n_units, loss_func): super(DistributedBoW, self).__init__( embed=F.EmbedID(n_vocab + n_docs, n_units, initialW=I.Uniform(1. / n_units)), loss_func=loss_func, )
def make_rnnlm_model(n_vocab, n_embed, n_hidden): return make_model( w_xe=functions.EmbedID(n_vocab, n_embed), w_eh=functions.Linear(n_embed, n_hidden), w_hh=functions.Linear(n_hidden, n_hidden), w_hy=functions.Linear(n_hidden, n_vocab), )
def __init__(self, deep, gpu, word2index, in_units, hidden_units, out_units, loss_func, train, drop_ratio=0.0): n_vocab = len(word2index) l2r_embedding = F.EmbedID(n_vocab, in_units) r2l_embedding = F.EmbedID(n_vocab, in_units) if deep: super(BiLstmContext, self).__init__( l2r_embed=l2r_embedding, r2l_embed=r2l_embedding, loss_func=loss_func, l2r_1=L.LSTM(in_units, hidden_units), r2l_1=L.LSTM(in_units, hidden_units), l3=L.Linear(2 * hidden_units, 2 * hidden_units), l4=L.Linear(2 * hidden_units, out_units), ) else: super(BiLstmContext, self).__init__(l2r_embed=l2r_embedding, r2l_embed=r2l_embedding, loss_func=loss_func, l2r_1=L.LSTM(in_units, hidden_units), r2l_1=L.LSTM(in_units, hidden_units), lp_l2r=L.Linear(hidden_units, out_units / 2), lp_r2l=L.Linear(hidden_units, out_units / 2)) if gpu >= 0: self.to_gpu() l2r_embedding.W.data = self.xp.random.normal( 0, math.sqrt(1. / l2r_embedding.W.data.shape[0]), l2r_embedding.W.data.shape).astype(np.float32) r2l_embedding.W.data = self.xp.random.normal( 0, math.sqrt(1. / r2l_embedding.W.data.shape[0]), r2l_embedding.W.data.shape).astype(np.float32) self.word2index = word2index self.train = train self.deep = deep self.drop_ratio = drop_ratio
def __init__(self, embed_size, hidden_size, source_vocab): super(Encoder, self).__init__( word_id_2_embed=F.EmbedID(source_vocab, embed_size, ignore_label=-1), embed_2_lstm_input=F.Linear(embed_size, hidden_size * 4), pre_hidden_2_lstm_input=F.Linear(hidden_size, hidden_size * 4), )
def __init__(self, embed_size, hidden_size, target_vocab): super(Decoder, self).__init__( word_id_2_embed=F.EmbedID(target_vocab, embed_size, ignore_label=-1), embed_2_lstm_input=F.Linear(embed_size, hidden_size * 4), pre_hidden_2_lstm_input=F.Linear(hidden_size, hidden_size * 4), hidden_2_word_id=F.Linear(hidden_size, target_vocab), )
def __init__(self, emb_dim, vocab_size, layers, suppress_output=False, lstm=False, irnn=False, active=F.relu, eos_id=0): """ Recurrent Neural Network with multiple layers. in_dim -> layers[0] -> ... -> layers[-1] -> out_dim (optional) :param int emb_dim: dimension of embeddings :param int vocab_size: size of vocabulary :param layers: dimensions of hidden layers :type layers: list of int :param bool suppress_output: suppress output :param bool lstm: whether to use LSTM :param bool irnn: whether to use IRNN :param chainer.Function active: activation function between layers of vanilla RNN :param int eos_id: ID of <BOS> and <EOS> """ assert not (lstm and irnn) self.emb_dim = emb_dim self.vocab_size = vocab_size self.layers = layers self.suppress_output = suppress_output self.lstm = lstm self.irnn = irnn self.active = active self.eos_id = eos_id # set up NN architecture model = chainer.FunctionSet(emb=F.EmbedID(vocab_size, emb_dim), ) # add hidden layers layer_dims = [emb_dim] + layers for i in range(len(layers)): in_dim = layer_dims[i] out_dim = layer_dims[i + 1] if lstm: linear = F.Linear(in_dim, out_dim * 4) hidden = F.Linear(out_dim, out_dim * 4) else: linear = F.Linear(in_dim, out_dim) hidden = F.Linear(out_dim, out_dim) if irnn: # initialize hidden connection with identity matrix hidden.W = np.eye(out_dim) setattr(model, 'l{}_x'.format(i + 1), linear) setattr(model, 'l{}_h'.format(i + 1), hidden) if not suppress_output: # add output layer setattr(model, 'l_y', F.Linear(layer_dims[-1], vocab_size)) self.model = model
def __init__(self, n_vocab, n_units, n_labels, train=True): super(BLSTM, self).__init__( embed=F.EmbedID(n_vocab, n_units, ignore_label=-1), # fl=L.LSTM(n_units, n_units), # bl=L.LSTM(n_units, n_units), ll=L.Linear(n_units, 2) ) for param in self.params(): param.data[...] = np.random.uniform(-0.1, 0.1, param.data.shape) self.train = train
def __init__(self, n_vocab, n_units): super(CharRNN, self).__init__( embed=F.EmbedID(n_vocab, n_units), l1_x=F.Linear(n_units, 4 * n_units), l1_h=F.Linear(n_units, 4 * n_units), l2_h=F.Linear(n_units, 4 * n_units), l2_x=F.Linear(n_units, 4 * n_units), l3=F.Linear(n_units, n_vocab), ) for param in self.parameters: param[:] = np.random.uniform(-0.08, 0.08, param.shape)
def __init__(self, src_vocab, trg_vocab, n_embed=256, n_hidden=512, algorithm='Adam'): self.src_vocab = src_vocab self.trg_vocab = trg_vocab self.n_embed = n_embed self.n_hidden = n_hidden self.algorithm = algorithm self.model = FunctionSet(embed_x=F.EmbedID(len(src_vocab), n_embed), en_x_to_h=F.Linear(n_embed, 4 * n_hidden), en_h_to_h=F.Linear(n_hidden, 4 * n_hidden), en_h_to_de_h=F.Linear(n_hidden, 4 * n_hidden), de_h_to_embed_y=F.Linear(n_hidden, n_embed), embed_y_to_y=F.Linear(n_embed, len(trg_vocab)), y_to_h=F.EmbedID(len(trg_vocab), 4 * n_hidden), de_h_to_h=F.Linear(n_hidden, 4 * n_hidden))
def __make_model(self): self.__model = wrapper.make_model( w_xe = functions.EmbedID(len(self.__vocab), self.__n_embed), w_ea = functions.Linear(self.__n_embed, 4 * self.__n_hidden), w_aa = functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_eb = functions.Linear(self.__n_embed, 4 * self.__n_hidden), w_bb = functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_ay1 = functions.Linear(self.__n_hidden, 1), w_by1 = functions.Linear(self.__n_hidden, 1), w_ay2 = functions.Linear(self.__n_hidden, 1), w_by2 = functions.Linear(self.__n_hidden, 1), )
def make_model(self): initialW = np.random.uniform self.model = self.wrapper.make_model( # encoder w_xi=functions.EmbedID(len(self.src_vocab), self.n_embed), w_ip=functions.Linear( self.n_embed, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_embed))), w_pp=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), # decoder w_pq=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), w_qj=functions.Linear( self.n_hidden, self.n_embed, initialW=initialW(-0.1, 0.1, (self.n_embed, self.n_hidden))), w_jy=functions.Linear( self.n_embed, len(self.trg_vocab), initialW=initialW(-0.1, 0.1, (len(self.trg_vocab), self.n_embed))), w_yq=functions.EmbedID(len(self.trg_vocab), 4 * self.n_hidden), w_qq=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), )
def __make_model(self): self.__model = wrapper.make_model( # input embedding w_xi=functions.EmbedID(len(self.__src_vocab), self.__n_embed), # forward encoder w_ia=functions.Linear(self.__n_embed, 4 * self.__n_hidden), w_aa=functions.Linear(self.__n_hidden, 4 * self.__n_hidden), # backward encoder w_ib=functions.Linear(self.__n_embed, 4 * self.__n_hidden), w_bb=functions.Linear(self.__n_hidden, 4 * self.__n_hidden), # attentional weight estimator w_aw=functions.Linear(self.__n_hidden, self.__n_hidden), w_bw=functions.Linear(self.__n_hidden, self.__n_hidden), w_pw=functions.Linear(self.__n_hidden, self.__n_hidden), w_we=functions.Linear(self.__n_hidden, 1), # decoder w_ap=functions.Linear(self.__n_hidden, self.__n_hidden), w_bp=functions.Linear(self.__n_hidden, self.__n_hidden), w_yp=functions.EmbedID(len(self.__trg_vocab), 4 * self.__n_hidden), w_pp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_cp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_dp=functions.Linear(self.__n_hidden, 4 * self.__n_hidden), w_py=functions.Linear(self.__n_hidden, len(self.__trg_vocab)), )
def __init__(self, n_vocab, n_units, batch_size): super(CharIRNN, self).__init__( embed=F.EmbedID(n_vocab, n_units), l1_x=F.Linear(n_units, n_units), l1_h=F.Linear(n_units, n_units), l2_h=F.Linear(n_units, n_units), l2_x=F.Linear(n_units, n_units), l3=F.Linear(n_units, n_vocab), ) self.sorted_funcs = sorted(six.iteritems(self.__dict__)) for param in self.parameters: param[:] = np.random.uniform(-0.08, 0.08, param.shape) self.l1_h.W = np.eye(self.l1_h.W.shape[0], dtype=np.float32) * 0.5 self.l2_h.W = np.eye(self.l2_h.W.shape[0], dtype=np.float32) * 0.5 self.reset_state(batch_size)
def _create_and_initialize_model(self, tags, vocab): # The model is feed-forward LSTM: # (word_id -> word_emb)_t -> LSTM -> (distribution over tag_id)_t self.model = chainer.FunctionSet() self.model.embed = F.EmbedID(len(vocab), self.n_lstm_cells) self.model.lstm_x_to_h = F.Linear(self.n_lstm_cells, 4 * self.n_lstm_cells) self.model.lstm_h_to_h = F.Linear(self.n_lstm_cells, 4 * self.n_lstm_cells) self.model.yclf = F.Linear(self.n_lstm_cells, len(tags)) # Randomly initialize the parameters. for param in self.model.parameters: param[:] = np.random.uniform(-0.1, 0.1, param.shape)
def __init__(self,caption_model_place,cnn_model_place,index2word_place,gpu_id=-1,beamsize=3): #basic paramaters you need to modify self.gpu_id=gpu_id# GPU ID. if you want to use cpu, -1 self.beamsize=beamsize #Gpu Setting global xp if self.gpu_id >= 0: xp = cuda.cupy cuda.get_device(gpu_id).use() else: xp=np # Prepare dataset with open(index2word_place, 'r') as f: self.index2word = pickle.load(f) vocab=self.index2word #Load Caffe Model with open(cnn_model_place, 'r') as f: self.func = pickle.load(f) #Model Preparation image_feature_dim=1024#dimension of image feature self.n_units = 512 #number of units per layer n_units = 512 self.model = FunctionSet() self.model.img_feature2vec=F.Linear(image_feature_dim, n_units)#CNN(I)の最後のレイヤーに相当。#parameter W,b self.model.embed=F.EmbedID(len(vocab), n_units)#W_e*S_tに相当 #parameter W self.model.l1_x=F.Linear(n_units, 4 * n_units)#parameter W,b self.model.l1_h=F.Linear(n_units, 4 * n_units)#parameter W,b self.model.out=F.Linear(n_units, len(vocab))#parameter W,b serializers.load_hdf5(caption_model_place, self.model)#read pre-trained model #To GPU if gpu_id >= 0: self.model.to_gpu() self.func.to_gpu() #to avoid overflow. #I don't know why, but this model overflows at the first time only with CPU. #So I intentionally make overflow so that it never happns after that. if gpu_id < 0: numpy_image = np.ones((3, 224,224), dtype=np.float32) self.generate(numpy_image)
def __init__(self, emb_dim, vocab_size, layer_dims, feature_dim, suppress_output, eos_id=0): """ Recurrent Neural Network with multiple layers. in_dim -> layers[0] -> ... -> layers[-1] -> out_dim (optional) :param int emb_dim: dimension of embeddings :param int vocab_size: size of vocabulary :param layer_dims: dimensions of hidden layers :param int feature_dim: dimesion of external feature :type layer_dims: list of int :param bool suppress_output: whether to suppress output :param int eos_id: ID of <BOS> and <EOS> """ super(Rnn, self).__init__(emb=F.EmbedID(vocab_size, emb_dim)) self.emb_dim = emb_dim self.vocab_size = vocab_size self.layer_dims = layer_dims self.feature_dim = feature_dim self.suppress_output = suppress_output self.eos_id = eos_id # add hidden layer_dims ls_xh = ChainList() ls_hh = ChainList() ls_fh = ChainList() layer_dims = [emb_dim] + layer_dims for in_dim, out_dim in zip(layer_dims, layer_dims[1:]): ls_xh.add_link(F.Linear(in_dim, out_dim * 4)) ls_hh.add_link(F.Linear(out_dim, out_dim * 4)) ls_fh.add_link(F.Linear(feature_dim, out_dim * 4)) self.add_link('ls_xh', ls_xh) self.add_link('ls_hh', ls_hh) self.add_link('ls_fh', ls_fh) if not suppress_output: # add output layer self.add_link('l_y', F.Linear(layer_dims[-1], self.vocab_size))
def __init__(self, n_vocab, n_units, loss_func): super(ContinuousBoW, self).__init__( embed=F.EmbedID( n_vocab, n_units, initialW=I.Uniform(1. / n_units)), loss_func=loss_func, )
words = open(filename).read().replace('\n', '<eos>').strip().split() dataset = np.ndarray((len(words), ), dtype=np.int32) for i, word in enumerate(words): if word not in vocab: vocab[word] = len(vocab) dataset[i] = vocab[word] return dataset train_data = load_data('ptb.train.txt') valid_data = load_data('ptb.valid.txt') test_data = load_data('ptb.test.txt') print('#vocab =', len(vocab)) # Prepare RNNLM model model = chainer.FunctionSet(embed=F.EmbedID(len(vocab), n_units), l1_x=F.Linear(n_units, 4 * n_units), l1_h=F.Linear(n_units, 4 * n_units), l2_x=F.Linear(n_units, 4 * n_units), l2_h=F.Linear(n_units, 4 * n_units), l3=F.Linear(n_units, len(vocab))) for param in model.parameters: param[:] = np.random.uniform(-0.1, 0.1, param.shape) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() model.to_gpu() def forward_one_step(x_data, y_data, state, train=True): # Neural net architecture
def make_model(self): initialW = np.random.uniform self.model = self.wrapper.make_model( # input embedding w_xi=functions.EmbedID(len(self.src_vocab), self.n_embed), # forward encoder w_ia=functions.Linear( self.n_embed, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_embed))), w_aa=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), # backward encoder w_ib=functions.Linear( self.n_embed, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_embed))), w_bb=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), # attentional weight estimator w_aw=functions.Linear( self.n_hidden, self.n_hidden, initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))), w_bw=functions.Linear( self.n_hidden, self.n_hidden, initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))), w_pw=functions.Linear( self.n_hidden, self.n_hidden, initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))), w_we=functions.Linear(self.n_hidden, 1, initialW=initialW(-0.1, 0.1, (1, self.n_hidden))), # decoder w_ap=functions.Linear( self.n_hidden, self.n_hidden, initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))), w_bp=functions.Linear( self.n_hidden, self.n_hidden, initialW=initialW(-0.1, 0.1, (self.n_hidden, self.n_hidden))), w_yp=functions.EmbedID(len(self.trg_vocab), 4 * self.n_hidden), w_pp=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), w_cp=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), w_dp=functions.Linear( self.n_hidden, 4 * self.n_hidden, initialW=initialW(-0.1, 0.1, (4 * self.n_hidden, self.n_hidden))), w_py=functions.Linear( self.n_hidden, len(self.trg_vocab), initialW=initialW(-0.1, 0.1, (len(self.trg_vocab), self.n_hidden))), )
traverse(tree, train=False, evaluate=result) acc_node = 100.0 * result['correct_node'] / result['total_node'] acc_root = 100.0 * result['correct_root'] / result['total_root'] print(' Node accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format( acc_node, result['correct_node'], result['total_node'])) print(' Root accuracy: {0:.2f} %% ({1:,d}/{2:,d})'.format( acc_root, result['correct_root'], result['total_root'])) vocab = {} train_trees = read_corpus('trees/train.txt', vocab) test_trees = read_corpus('trees/test.txt', vocab) develop_trees = read_corpus('trees/dev.txt', vocab) model = chainer.FunctionSet( embed=F.EmbedID(len(vocab), n_units), l=F.Linear(n_units * 2, n_units), w=F.Linear(n_units, n_label), ) if args.gpu >= 0: model.to_gpu() # Setup optimizer optimizer = optimizers.AdaGrad(lr=0.1) optimizer.setup(model) accum_loss = 0 count = 0 start_at = time.time() cur_at = start_at