def __init__(self, encoder, decoder, root_id, config, ac_size): self.config = config self.encoder = encoder self.decoder = decoder self.root = root_id encoder_p = next(filter(lambda p: p.requires_grad, encoder.parameters())) self.use_cuda = encoder_p.is_cuda self.bucket = Variable(torch.zeros(self.config.train_batch_size, 1, self.config.lstm_hiddens * 2)).type(torch.FloatTensor) self.cut = Variable(torch.zeros(self.config.train_batch_size, ac_size)).type(torch.FloatTensor) self.index = Variable(torch.zeros(self.config.train_batch_size * 4)).type(torch.LongTensor) self.device = encoder_p.get_device() if self.use_cuda else None if self.use_cuda: self.bucket = self.bucket.cuda(self.device) self.index = self.index.cuda(self.device) self.cut = self.cut.cuda(self.device) self.gold_pred_pairs = [] self.training = True if self.config.train_batch_size > self.config.test_batch_size: batch_size = self.config.train_batch_size else: batch_size = self.config.test_batch_size self.batch_states = [] self.step = [] for idx in range(0, batch_size): self.batch_states.append([]) self.step.append(0) for idy in range(0, 1024): self.batch_states[idx].append(State())
def get_gold_actions(data, vocab): all_actions = [] states = [] for idx in range(0, 1024): states.append(State()) all_feats = [] for sentence in data: start = states[0] start.clear() start.ready(sentence, vocab) actions = [] step = 0 inst_feats = [] while not states[step].is_end(): gold_action = states[step].get_gold_action(vocab) gold_feats = states[step].prepare_index(start._word_size) inst_feats.append(deepcopy(gold_feats)) actions.append(gold_action) next_state = states[step + 1] states[step].move(next_state, gold_action) step += 1 all_feats.append(inst_feats) all_actions.append(actions) result = states[step].get_result(vocab) arc_total, arc_correct, rel_total, rel_correct = evalDepTree( sentence, result) assert arc_total == arc_correct and rel_total == rel_correct assert len(actions) == (len(sentence) - 1) * 2 return all_feats, all_actions
def __init__(self, wordEnc, EDUEnc, dec, config): self.config = config self.wordEnc = wordEnc self.EDUEnc = EDUEnc self.dec = dec self.use_cuda = next(filter(lambda p: p.requires_grad, wordEnc.parameters())).is_cuda self.batch_states = [] self.step = [] for idx in range(config.test_batch_size): self.batch_states.append([]) self.step.append(0) for idy in range(1024): self.batch_states[idx].append(State())
def move(self, batch_states, pred_actions, vocab): count = 0 for idx in range(0, len(batch_states)): if not batch_states[idx][-1].is_end(): count += 1 assert len(pred_actions) == count offset = 0 for (idx, cur_states) in enumerate(batch_states): if not cur_states[-1].is_end(): next_state = State() cur_states[-1].move(next_state, pred_actions[offset]) cur_states.append(next_state) offset += 1 self.step[idx] += 1
def get_gold_candid(data, vocab): states = [] all_candid = [] for idx in range(0, 1024): states.append(State()) for sentence in data: start = states[0] start.clear() start.ready(sentence, vocab) step = 0 inst_candid = [] while not states[step].is_end(): gold_action = states[step].get_gold_action(vocab) candid = states[step].get_candidate_actions(vocab) inst_candid.append(candid) next_state = states[step + 1] states[step].move(next_state, gold_action) step += 1 all_candid.append(inst_candid) return all_candid
def get_gold_actions(data, vocab): for doc in data: for action in doc.gold_actions: if action.is_reduce(): action.label = vocab.rel2id(action.label_str) all_actions = [] states = [] for idx in range(1024): states.append(State()) all_feats = [] S = Metric() N = Metric() R = Metric() F = Metric() for doc in data: start = states[0] start.clear() start.ready(doc) step = 0 inst_feats = [] inst_candidate = [] action_num = len(doc.gold_actions) while not states[step].is_end(): assert step < action_num gold_action = doc.gold_actions[step] gold_feats = states[step].prepare_index() inst_feats.append(deepcopy(gold_feats)) next_state = states[step + 1] states[step].move(next_state, gold_action) step += 1 all_feats.append(inst_feats) all_actions.append(doc.gold_actions) assert len(inst_feats) == len(doc.gold_actions) result = states[step].get_result(vocab) doc.evaluate(result, S, N, R, F) assert S.bIdentical() and N.bIdentical() and R.bIdentical( ) and F.bIdentical() return all_feats, all_actions
def get_gold_actions(data, vocab): all_actions = [] all_states = [] for idx in range(0, 1024): all_states.append(State()) for sentence in data: start = all_states[0] start.clear() start.ready(sentence, vocab) actions = [] step = 0 while not all_states[step].is_end(): gold_action = all_states[step].get_gold_action(vocab) actions.append(gold_action) next_state = all_states[step + 1] all_states[step].move(next_state, gold_action) step += 1 all_actions.append(actions) result = all_states[step].get_result(vocab) arc_total, arc_correct, rel_total, rel_correct = evalDepTree( sentence, result) assert arc_total == arc_correct and rel_total == rel_correct assert len(actions) == (len(sentence) - 1) * 2 return all_actions
def decode(self, batch_data, bacth_gold_actions, vocab): decoder_scores = [] self.step.clear() self.gold_pred_actions.clear() b = self.encoder_outputs.size()[0] start_states = [] for idx in range(0, b): start_states.append(State()) start_states[idx].ready(batch_data[idx], vocab) self.step.append(0) batch_states = [] for idx in range(0, b): one_inst_states = [] one_inst_states.append(start_states[idx]) batch_states.append(one_inst_states) while not self.all_states_are_finished(batch_states): self.prepare_atom_feat(batch_states, vocab) if self.training: gold_actions = self.get_gold_actions(batch_states, bacth_gold_actions) hidden_states = self.batch_hidden_state(batch_states) all_candidates = self.get_candidates(batch_states, vocab) action_scores = self.decoder.forward(hidden_states, all_candidates) pred_ac_ids = self.get_predicted_ac_id(action_scores) pred_actions = self.get_predict_actions(pred_ac_ids, vocab) batch_action_scores = self.padding_action_scores(batch_states, action_scores) if self.training: self.move(batch_states, gold_actions, vocab) self.gold_pred_actions.append((gold_actions, pred_actions)) else: self.move(batch_states, pred_actions, vocab) decoder_scores.append(batch_action_scores.unsqueeze(1)) self.batch_states = batch_states self.decoder_outputs = torch.cat(decoder_scores, 1)
try: states_count[states.index(state_names)] += 1 except ValueError: states.append(state_names) states_count.append(1) if len(new_communities) > 1: # match communities confusion = create_confusion_matrix(new_communities[-2], new_communities[-1]) create_continuation_matrix(confusion, sigma, jaccard_null_model) # [clean_old.set.set_ground_truth(event='reset') for clean_old in new_communities[-2]] match_communities(new_communities[-2], new_communities[-1], dead_communities, confusion) state_transition = State(list(confusion[:-1, :-1].sum(axis=1)), list(confusion[:-1, :-1].sum(axis=0)), vi) state = np.reshape(confusion[:-1, :-1], state_transition.edges_len) state_transition.set_state(state) score = state_transition.similarity # find rebirths # for dead in dead_communities: candidate_communities = [ communities for communities in new_communities[-1] if communities.community_events[-1][1] != "P" ] if candidate_communities: dead_confusion = create_confusion_matrix(dead_communities, candidate_communities) save_jaccard_index_matrix = Community.jaccard_index.copy() save_continuation_matrix = Community.continuation.copy()