s_data_list = list() s_data_list.append(s_word) s_data_list.append(s_pos) s_data_list.append([sen_idx]) s_feats = utils.construct_feats(feats, s) s_data_list.append(s_feats) data_list.append(s_data_list) if options.prior_weight > 0: s_prior = utils.construct_prior(prior_set, s, pos, options.tag_num, options.prior_weight) prior_dict[sen_idx] = s_prior if options.use_gold: s_gold = list(map(lambda e: e.parent_id, s.entries)) gold_dict[sen_idx] = s_gold sen_idx += 1 # batch_data = utils.construct_batch_data(data_list, options.batchsize) batch_data = utils.construct_update_batch_data(data_list, options.batchsize) print 'Batch data constructed' dependency_tagging_model = dt_sparse_model.sparse_model(w2i, pos, feats, options) if options.prior_weight > 0: dependency_tagging_model.prior_dict = prior_dict if options.use_gold: dependency_tagging_model.gold_dict = gold_dict print 'Model constructed' if options.gold_init: dependency_tagging_model.golden_init_decoder(sentences) else: dependency_tagging_model.init_decoder_param(sentences) print 'Decoder parameters initialized' if options.gpu >= 0 and torch.cuda.is_available(): torch.cuda.set_device(options.gpu)
def predict(self, trans_param, decision_param, batch_size, decision_counter, from_decision, to_decision, child_only, trans_counter): input_pos_num, target_pos_num, _, _, dir_num, cvalency = trans_param.shape input_decision_pos_num, _, decision_dir_num, dvalency, target_decision_num = decision_param.shape input_trans_list = [[p, d, cv] for p in range(input_pos_num) for d in range(dir_num) for cv in range(cvalency)] input_decision_list = [[p, d, dv] for p in range(input_decision_pos_num) for d in range(dir_num) for dv in range(dvalency)] batched_input_trans = utils.construct_update_batch_data( input_trans_list, batch_size) batched_input_decision = utils.construct_update_batch_data( input_decision_list, batch_size) trans_batch_num = len(batched_input_trans) decision_batch_num = len(batched_input_decision) for i in range(trans_batch_num): # Update transition parameters one_batch_size = len(batched_input_trans[i]) one_batch_input_pos = torch.LongTensor(batched_input_trans[i])[:, 0] one_batch_dir = torch.LongTensor(batched_input_trans[i])[:, 1] one_batch_cvalency = torch.LongTensor(batched_input_trans[i])[:, 2] one_batch_input_pos_index = np.array(batched_input_trans[i])[:, 0] one_batch_dir_index = np.array(batched_input_trans[i])[:, 1] one_batch_cvalency_index = np.array(batched_input_trans[i])[:, 2] predicted_trans_param = self.forward_(one_batch_input_pos, one_batch_dir, one_batch_cvalency, None, None, True, 'child', self.em_type) trans_param[ one_batch_input_pos_index, :, :, :, one_batch_dir_index, one_batch_cvalency_index] = predicted_trans_param.detach( ).numpy().reshape(one_batch_size, target_pos_num, 1, 1) if not child_only: for i in range(decision_batch_num): # Update decision parameters one_batch_size = len(batched_input_decision[i]) if self.unified_network: one_batch_input_decision_pos = torch.LongTensor( map(lambda p: from_decision[p], np.array(batched_input_decision[i])[:, 0])) else: one_batch_input_decision_pos = torch.LongTensor( batched_input_decision[i])[:, 0] one_batch_decision_dir = torch.LongTensor( batched_input_decision[i])[:, 1] one_batch_dvalency = torch.LongTensor( batched_input_decision[i])[:, 2] if self.unified_network: one_batch_input_decision_pos_index = np.array( one_batch_input_decision_pos).tolist() one_batch_input_decision_pos_index = np.array( map(lambda p: to_decision[p], one_batch_input_decision_pos_index)) else: one_batch_input_decision_pos_index = np.array( batched_input_decision[i])[:, 0] one_batch_decision_dir_index = np.array( batched_input_decision[i])[:, 1] one_batch_dvalency_index = np.array( batched_input_decision[i])[:, 2] if self.unified_network: predicted_decision_param = self.forward_( one_batch_input_decision_pos, one_batch_decision_dir, one_batch_dvalency, None, None, True, 'decision', self.em_type) else: predicted_decision_param = self.forward_decision( one_batch_input_decision_pos, one_batch_decision_dir, one_batch_dvalency, None, None, True, self.em_type) to_predict = decision_param[ one_batch_input_decision_pos_index, :, one_batch_decision_dir_index, one_batch_dvalency_index, :] decision_param[ one_batch_input_decision_pos_index, :, one_batch_decision_dir_index, one_batch_dvalency_index, :] = predicted_decision_param.detach( ).numpy().reshape(one_batch_size, 1, target_decision_num) if child_only: decision_counter = decision_counter + self.param_smoothing decision_sum = np.sum(decision_counter, axis=4, keepdims=True) decision_param = decision_counter / decision_sum decision_counter = decision_counter + self.param_smoothing decision_sum = np.sum(decision_counter, axis=4, keepdims=True) decision_param_compare = decision_counter / decision_sum decision_difference = decision_param_compare - decision_param if not self.child_only: print('distance for decision in this iteration ' + str(LA.norm(decision_difference))) trans_counter = trans_counter + self.param_smoothing child_sum = np.sum(trans_counter, axis=(1, 3), keepdims=True) trans_param_compare = trans_counter / child_sum #trans_difference = trans_param_compare - trans_param #print 'distance for trans in this iteration ' + str(LA.norm(trans_difference)) return trans_param, decision_param
def predict(self, trans_param, decision_param, decision_counter, child_only): self.eval() if self.sentence_predict: _, input_pos_num, target_pos_num, dir_num, cvalency = trans_param.shape else: input_pos_num, target_pos_num, dir_num, cvalency, lan_num = trans_param.shape input_decision_pos_num, decision_dir_num, dvalency, target_decision_num, lan_num = decision_param.shape input_trans_list = [[p, d, cv, l] for p in range(input_pos_num) for d in range(dir_num) for cv in range(cvalency) for l in range(lan_num)] input_decision_list = [[p, d, dv, l] for p in range(input_decision_pos_num) for d in range(dir_num) for dv in range(dvalency) for l in range(lan_num)] batched_input_trans = utils.construct_update_batch_data( input_trans_list, self.sample_batch_size) batched_input_decision = utils.construct_update_batch_data( input_decision_list, self.sample_batch_size) trans_batch_num = len(batched_input_trans) decision_batch_num = len(batched_input_decision) if self.sentence_predict: for s in range(len(self.sentence_map)): for i in range(trans_batch_num): # Update transition parameters one_batch_size = len(batched_input_trans[i]) batch_target_lan_v = torch.LongTensor([ self.languages[self.language_map[s]] ]).expand(one_batch_size) batch_input_len = torch.LongTensor( [len(self.sentence_map[s])]).expand(one_batch_size) batch_input_sen_v = torch.LongTensor([ self.sentence_map[s] ]).expand(one_batch_size, len(self.sentence_map[s])) one_batch_input_pos = torch.LongTensor( batched_input_trans[i])[:, 0] one_batch_dir = torch.LongTensor(batched_input_trans[i])[:, 1] one_batch_cvalency = torch.LongTensor( batched_input_trans[i])[:, 2] # Parameter index for update one_batch_input_pos_index = np.array( batched_input_trans[i])[:, 0] one_batch_dir_index = np.array(batched_input_trans[i])[:, 1] one_batch_cvalency_index = np.array( batched_input_trans[i])[:, 2] predicted_trans_param, _ = self.forward_( one_batch_input_pos, one_batch_dir, one_batch_cvalency, None, None, True, 'child', batch_target_lan_v, batch_input_sen_v, batch_input_len) trans_param[s][ one_batch_input_pos_index, :, one_batch_dir_index, one_batch_cvalency_index] = predicted_trans_param.detach( ).numpy() else: for i in range(trans_batch_num): one_batch_size = len(batched_input_trans[i]) one_batch_input_pos = torch.LongTensor( batched_input_trans[i])[:, 0] one_batch_dir = torch.LongTensor(batched_input_trans[i])[:, 1] one_batch_cvalency = torch.LongTensor( batched_input_trans[i])[:, 2] one_batch_lan = torch.LongTensor(batched_input_trans[i])[:, 3] # Parameter index for update one_batch_input_pos_index = np.array(batched_input_trans[i])[:, 0] one_batch_dir_index = np.array(batched_input_trans[i])[:, 1] one_batch_cvalency_index = np.array(batched_input_trans[i])[:, 2] one_batch_lan_index = np.array(batched_input_trans[i])[:, 3] predicted_trans_param, _ = self.forward_( one_batch_input_pos, one_batch_dir, one_batch_cvalency, None, None, True, 'child', one_batch_lan, None, None) trans_param[ one_batch_input_pos_index, :, one_batch_dir_index, one_batch_cvalency_index, one_batch_lan_index] = predicted_trans_param.detach( ).numpy() if not child_only: for i in range(decision_batch_num): # Update decision parameters one_batch_input_decision_pos = torch.LongTensor( np.array(batched_input_decision[i])[:, 0]) one_batch_decision_dir = torch.LongTensor( batched_input_decision[i])[:, 1] one_batch_dvalency = torch.LongTensor( batched_input_decision[i])[:, 2] one_batch_decision_lan = torch.LongTensor( batched_input_decision[i])[:, 3] # Decision parameter index for update one_batch_input_decision_pos_index = np.array( batched_input_decision[i])[:, 0] one_batch_decision_dir_index = np.array( batched_input_decision[i])[:, 1] one_batch_dvalency_index = np.array( batched_input_decision[i])[:, 2] one_batch_decision_lan_index = np.array( batched_input_decision[i])[:, 3] predicted_decision_param, _ = self.forward_( one_batch_input_decision_pos, one_batch_decision_dir, one_batch_dvalency, None, None, True, 'decision', one_batch_decision_lan, None, None) decision_param[ one_batch_input_decision_pos_index, one_batch_decision_dir_index, one_batch_dvalency_index, :, one_batch_decision_lan_index] = predicted_decision_param.detach( ).numpy() else: decision_counter = decision_counter + self.param_smoothing decision_sum = np.sum(decision_counter, axis=3, keepdims=True) decision_param = old_div(decision_counter, decision_sum) return trans_param, decision_param
def predict(self, sentence_trans_param, root_param, decision_param, batch_size, trans_counter, root_counnter, decision_counter, sentence_map, language_map, languages, epoch): _, input_pos_num, target_pos_num, dir_num, cvalency = sentence_trans_param.shape input_decision_pos_num, decision_dir_num, dvalency, target_decision_num = decision_param.shape input_trans_list = [[p, d, cv] for p in range(input_pos_num) for d in range(dir_num) for cv in range(cvalency)] input_decision_list = [[p, d, dv] for p in range(input_decision_pos_num) for d in range(dir_num) for dv in range(dvalency)] batched_input_trans = utils.construct_update_batch_data( input_trans_list, batch_size) batched_input_decision = utils.construct_update_batch_data( input_decision_list, batch_size) trans_batch_num = len(batched_input_trans) decision_batch_num = len(batched_input_decision) for s in range(len(sentence_map)): for i in range(trans_batch_num): # Update transition parameters one_batch_size = len(batched_input_trans[i]) batch_target_lan_v = torch.LongTensor([ languages[language_map[s]] ]).expand(one_batch_size) # TODO hanwj batch_input_len = torch.LongTensor([len(sentence_map[s]) ]).expand(one_batch_size) batch_input_sen_v = torch.LongTensor([sentence_map[s]]).expand( one_batch_size, len(sentence_map[s])) one_batch_input_pos = torch.LongTensor( batched_input_trans[i])[:, 0] one_batch_dir = torch.LongTensor(batched_input_trans[i])[:, 1] one_batch_cvalency = torch.LongTensor( batched_input_trans[i])[:, 2] one_batch_input_pos_index = np.array(batched_input_trans[i])[:, 0] one_batch_dir_index = np.array(batched_input_trans[i])[:, 1] one_batch_cvalency_index = np.array(batched_input_trans[i])[:, 2] predicted_trans_param = self.forward_(one_batch_input_pos, one_batch_dir, one_batch_cvalency, None, None, True, 'child', self.em_type, batch_target_lan_v, batch_input_sen_v, batch_input_len, epoch=epoch) sentence_trans_param[s][ one_batch_input_pos_index, :, one_batch_dir_index, one_batch_cvalency_index] = predicted_trans_param.detach( ).numpy() #.reshape(one_batch_size, target_pos_num, 1, 1) # TODO: # if not child_only: # for i in range(decision_batch_num): # # Update decision parameters # one_batch_size = len(batched_input_decision[i]) # if self.unified_network: # one_batch_input_decision_pos = torch.LongTensor( # map(lambda p: from_decision[p], np.array(batched_input_decision[i])[:, 0])) # else: # one_batch_input_decision_pos = torch.LongTensor(batched_input_decision[i])[:, 0] # one_batch_decision_dir = torch.LongTensor(batched_input_decision[i])[:, 1] # one_batch_dvalency = torch.LongTensor(batched_input_decision[i])[:, 2] # if self.unified_network: # one_batch_input_decision_pos_index = np.array(one_batch_input_decision_pos).tolist() # one_batch_input_decision_pos_index = np.array( # map(lambda p: to_decision[p], one_batch_input_decision_pos_index)) # else: # one_batch_input_decision_pos_index = np.array(batched_input_decision[i])[:, 0] # one_batch_decision_dir_index = np.array(batched_input_decision[i])[:, 1] # one_batch_dvalency_index = np.array(batched_input_decision[i])[:, 2] # if self.unified_network: # predicted_decision_param = self.forward_(one_batch_input_decision_pos, one_batch_decision_dir, # one_batch_dvalency, None, None, True, 'decision', # self.em_type) # else: # predicted_decision_param = self.forward_decision(one_batch_input_decision_pos, # one_batch_decision_dir, one_batch_dvalency, # None, None, True, self.em_type) # decision_param[one_batch_input_decision_pos_index, :, one_batch_decision_dir_index, # one_batch_dvalency_index, :] = predicted_decision_param.detach().numpy().reshape(one_batch_size, 1, # target_decision_num) # if child_only: decision_counter = decision_counter + self.param_smoothing decision_sum = np.sum(decision_counter, axis=3, keepdims=True) decision_param = decision_counter / decision_sum root_counnter = root_counnter + self.param_smoothing root_sum = np.sum(root_counnter) root_param = root_counnter / root_sum trans_counter = trans_counter + self.param_smoothing child_sum = np.sum(trans_counter, axis=1, keepdims=True) trans_param = trans_counter / child_sum # decision_counter = decision_counter + self.param_smoothing # decision_sum = np.sum(decision_counter, axis=3, keepdims=True) # decision_param_compare = decision_counter / decision_sum # decision_difference = decision_param_compare - decision_param # if not self.child_only: # print 'distance for decision in this iteration ' + str(LA.norm(decision_difference)) # trans_counter = trans_counter + self.param_smoothing # child_sum = np.sum(trans_counter, axis=(1, 3), keepdims=True) # trans_param_compare = trans_counter / child_sum # trans_difference = trans_param_compare - trans_param # print 'distance for trans in this iteration ' + str(LA.norm(trans_difference)) return sentence_trans_param, trans_param, root_param, decision_param
data_list = list() sen_idx = 0 sentence_map = {} # id 2 pos_seq data_pos = [] for s in sentences: _, s_pos = s.set_data_list(None, pos) s_data_list = list() s_data_list.append(s_pos) data_pos.append(s_pos) s_data_list.append(languages[language_map[sen_idx]]) s_data_list.append([sen_idx]) data_list.append(s_data_list) sentence_map[sen_idx] = s_pos sen_idx += 1 data_pos = np.array(data_pos) # list of sentences with only tags batch_data = utils.construct_update_batch_data( data_list, options.batchsize) # data_list: tag_seq, lang_id, stc_id print 'Batch data constructed' print 'Model constructed: ml_dmv_model and m_model' data_size = len(data_list) load_file = os.path.join(options.output, options.paramem) + '_' + str( options.loaded_model_idx) ml_dmv_model = MLDMV(pos, sentence_map, language_map, data_size, options) if False: # (not options.load_model) or (not os.path.exists(load_file)): ml_dmv_model.init_param(sentences) else: ml_dmv_model.trans_param, ml_dmv_model.root_param, ml_dmv_model.decision_param, _ = pickle.load( open(load_file, 'r')) # ml_dmv_model.sentence_trans_param # do_eval(ml_dmv_model, None, pos, options, epoch) loaded_file = os.path.join( options.output,