示例#1
0
    def create_instances(self, examples):
        all_instances = []
        succ = 0
        for id, ex in enumerate(logged_loop(examples)):
            n_words = len(ex['word']) - 1

            # arcs = {(h, t, label)}
            stack = [0]
            buf = [i + 1 for i in xrange(n_words)]
            arcs = []
            instances = []
            for i in xrange(n_words * 2):
                gold_t = self.get_oracle(stack, buf, ex)
                if gold_t is None:
                    break
                legal_labels = self.legal_labels(stack, buf)
                assert legal_labels[gold_t] == 1
                instances.append((self.extract_features(stack, buf, arcs, ex),
                                  legal_labels, gold_t))
                if gold_t == self.n_trans - 1:
                    stack.append(buf[0])
                    buf = buf[1:]
                elif gold_t < self.n_deprel:
                    arcs.append((stack[-1], stack[-2], gold_t))
                    stack = stack[:-2] + [stack[-1]]
                else:
                    arcs.append((stack[-2], stack[-1], gold_t - self.n_deprel))
                    stack = stack[:-1]
            else:
                succ += 1
                all_instances += instances

        return all_instances
示例#2
0
    def create_instances(self, examples):
        all_instances = []
        succ = 0
        for id, ex in enumerate(logged_loop(examples)):
            n_words = len(ex['word']) - 1

            # arcs = {(h, t, label)}
            stack = [0]
            buf = [i + 1 for i in xrange(n_words)]
            arcs = []
            instances = []
            for i in xrange(n_words * 2):
                gold_t = self.get_oracle(stack, buf, ex)
                if gold_t is None:
                    break
                legal_labels = self.legal_labels(stack, buf)
                assert legal_labels[gold_t] == 1
                instances.append(
                    (self.extract_features(stack, buf, arcs,
                                           ex), legal_labels, gold_t))
                if gold_t == self.n_trans - 1:
                    stack.append(buf[0])
                    buf = buf[1:]
                elif gold_t < self.n_deprel:
                    arcs.append((stack[-1], stack[-2], gold_t))
                    stack = stack[:-2] + [stack[-1]]
                else:
                    arcs.append((stack[-2], stack[-1], gold_t - self.n_deprel))
                    stack = stack[:-1]
            else:
                succ += 1
                all_instances += instances

        return all_instances
示例#3
0
    def create_instances(self, examples):
        all_instances = []
        succ = 0
        for id, ex in enumerate(logged_loop(examples)):
            n_words = len(ex['word']) - 1

            # arcs = {(h, t, label)}
            stack = [0]
            # buf初始保存句子中所有的词
            # 这个序号和head集合中的序号是对应的,序号范围在句子的长度内
            buf = [i + 1 for i in xrange(n_words)]
            arcs = []
            instances = []
            for i in xrange(n_words * 2):
                # 总共会执行2*n_words次trans操作,n为句子中单词总数
                # 得到当前状态下正确的trans操作
                gold_t = self.get_oracle(stack, buf, ex)
                if gold_t is None:
                    break
                legal_labels = self.legal_labels(stack, buf)
                assert legal_labels[gold_t] == 1
                # 为句子的每一次状态变化创建一个状态向量实例f=[18+18+12]
                # 每个状态向量表征当前未变化前的状态,对应一个正确的可执行操作的标识
                # 总共有2*n_words个状态向量F
                instances.append(
                    (self.extract_features(stack, buf, arcs,
                                           ex), legal_labels, gold_t))
                # 更新状态
                if gold_t == self.n_trans - 1:  # ==2 shift
                    stack.append(buf[0])
                    buf = buf[1:]
                elif gold_t < self.n_deprel:  # == 0 left-arc
                    arcs.append((stack[-1], stack[-2], gold_t))
                    stack = stack[:-2] + [stack[-1]]  # stack去除-2元素
                else:  # ==1 right-arc
                    right_label = gold_t if self.unlabeled else gold_t - self.n_deprel
                    arcs.append((stack[-2], stack[-1],
                                 right_label))  # 添加新获得的弧,(h,t)对应词序
                    stack = stack[:-1]  # stack去除-1元素
            else:
                succ += 1
                all_instances += instances  # 将提取回的句子特征加入总特征实例集合

        return all_instances
示例#4
0
    def create_instances(self, examples):
        '''
        :param examples: [{word:[..], pos:[..], head:[..], label:[..]}..],一个字典表示一个句子,里面用token2id的id表示
        :return: [[([n_feature长的特征], [可以采取的所有操作], 真实的操作)..]..]
        '''
        all_instances = []
        succ = 0
        for id, ex in enumerate(logged_loop(examples)):
            n_words = len(ex['word']) - 1

            # arcs = {(h, t, label)}
            stack = [0]
            buf = [i + 1 for i in range(n_words)]
            arcs = []
            instances = []
            for i in range(n_words * 2):  # 每个词有进有出,一共有2n步
                gold_t = self.get_oracle(stack, buf, ex)
                if gold_t is None:
                    break
                legal_labels = self.legal_labels(stack, buf)
                assert legal_labels[gold_t] == 1
                instances.append((self.extract_features(stack, buf, arcs, ex),
                                  legal_labels, gold_t))
                if gold_t == self.n_trans - 1:
                    stack.append(buf[0])
                    buf = buf[1:]
                elif gold_t < self.n_deprel:  # 说明是L-XXX
                    arcs.append((stack[-1], stack[-2], gold_t))
                    stack = stack[:-2] + [stack[-1]]
                else:
                    arcs.append((stack[-2], stack[-1], gold_t - self.n_deprel))
                    stack = stack[:-1]
            else:
                succ += 1
                all_instances += instances

        return all_instances