Пример #1
0
    def next_batch(self):
        '''
        get the netxt batch_data. 
        '''
        self.nN = self.nN + 1
        rins, lab, ret_ids, rinlens, rmaxlens, self.idx, rinlens_float32 = batch_range(
            self.batch_size,
            self.idx,
            self.nsamps,
            self.rand_idx,
            self.class_num,
            self.labels,
            self.ids,
            [self.texts, self.aspsubs, self.aspects, self.left_ctx, self.right_ctx, self.leftsubs, self.rightsubs]
        )
        fsents = rins[0]
        asubs = rins[1]
        asps = rins[2]
        left_ctx = rins[3]
        right_ctx = rins[4]
        lsubs = rins[5]
        rsubs = rins[6]
        # context bitmap.
        sent_bitmap = []
        # row sentence lengths.
        sequence_lengs = rinlens[0]
        left_sequence_lengs = rinlens[3]
        right_sequence_lengs = rinlens[4]
        seq_lens = []
        reverse_lens = []
        for x in xrange(len(sequence_lengs)):
            nl = sequence_lengs[x][0]
            if self.eos:
                nl += 1
            seq_lens.append(nl)  # add the <eos>
            reverse_lens.append(sequence_lengs[x][0])

        left_seq_lens = []
        right_seq_lens = []
        for x in xrange(len(left_sequence_lengs)):
            nl = left_sequence_lengs[x][0]
            left_seq_lens.append(nl)
            # reverse_lens.append(sequence_lengs[x][0])
        for x in xrange(len(right_sequence_lengs)):
            nl = right_sequence_lengs[x][0]
            right_seq_lens.append(nl)
            # reverse_lens.append(sequence_lengs[x][0])
        left_max_len = rmaxlens[3]
        right_max_len = rmaxlens[4]
        # pad index
        add_pad(
            inputs=[fsents, left_ctx, right_ctx],
            max_lens=[rmaxlens[0] + 1, rmaxlens[3], rmaxlens[4]],
            pad_idx=self.pad_idx
        )
        max_len = rmaxlens[0] + 1
        sent_bitmap = bitmap_by_padid(fsents, self.pad_idx)
        left_sent_bitmap = bitmap_by_padid(left_ctx, self.pad_idx)
        right_sent_bitmap = bitmap_by_padid(right_ctx, self.pad_idx)
        alpha_adj = copy.deepcopy(sent_bitmap)
        for row in alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break

        left_alpha_adj = copy.deepcopy(left_sent_bitmap)
        for row in left_alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break
        right_alpha_adj = copy.deepcopy(right_sent_bitmap)
        for row in right_alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break
        # count the aspect lens, and size
        # count the memory size

        abs_poses = []
        left_abs_poses = []
        right_abs_poses = []

        pos_ids = []
        left_pos_ids = []
        right_pos_ids = []

        asp_lens = []
        asp_size = 0
        mem_size = 0
        for x in xrange(len(seq_lens)):
            sl = seq_lens[x]
            left_l = left_seq_lens[x]
            right_l = right_seq_lens[x]
            asub = asubs[x]
            al = asub[1] - asub[0]
            asp_lens.append(al)
            if al > asp_size:
                asp_size = al
            ms = sl - al
            if ms > mem_size:
                mem_size = ms
            # count the position
            # left
            lt_pos = range(asub[0] + 1)[1:]
            lt_pid = range(asub[0] + 1)[1:]
            lt_pos.reverse()
            lt_pid.reverse()
            left_start = self.pos_id_range['left_start']
            tmp_lt_pid = np.array(lt_pid)
            tmp_lt_pid += left_start - 1
            lt_pid = tmp_lt_pid.tolist()
            # right
            rt_pos = range(sl - asub[1] + 1)[1:]
            rt_pid = range(sl - asub[1] + 1)[1:]
            right_start = self.pos_id_range['right_start']
            tmp_rt_pid = np.array(rt_pid)
            tmp_rt_pid += right_start - 1
            rt_pid = tmp_rt_pid.tolist()            
            # aspect
            asp_pos = [0 for _ in range(al)]
            asp_pid = [self.pos_id_range['asp'] for _ in range(al)]
            # lest. the pads.
            lest_pos = [0 for _ in range(max_len - sl)]
            l_lest_pos = [0 for _ in range(rmaxlens[3] - left_l)]
            r_lest_pos = [0 for _ in range(rmaxlens[4] - right_l)]
            lest_pid = [self.pos_id_range['oos'] for _ in range(max_len - sl)]
            l_lest_pid = [self.pos_id_range['oos'] for _ in range(rmaxlens[3] - left_l)]
            r_lest_pid = [self.pos_id_range['oos'] for _ in range(rmaxlens[4] - right_l)]
            # build the rets.
            abs_pos = []
            abs_pos.extend(lt_pos)
            abs_pos.extend(asp_pos)
            abs_pos.extend(rt_pos)
            abs_pos.extend(lest_pos)
            abs_poses.append(abs_pos)
            lt_pos.extend(l_lest_pos)
            left_abs_poses.append(lt_pos)
            rt_pos.extend(r_lest_pos)
            right_abs_poses.append(rt_pos)
            pos_id = []
            pos_id.extend(lt_pid)
            pos_id.extend(asp_pid)
            pos_id.extend(rt_pid)
            pos_id.extend(lest_pid)
            pos_ids.append(pos_id)
            lt_pid.extend(l_lest_pid)
            left_pos_ids.append(lt_pid)
            rt_pid.extend(r_lest_pid)
            right_pos_ids.append(rt_pid)
        add_pad(
            inputs=[asps],
            max_lens=[asp_size],
            pad_idx=self.pad_idx
        )
        # build the subs.
        asp_subs = []
        left_subs = []
        right_subs = []
        f_asp_subs = []
        b_asp_subs = []
        f_left_subs = []
        b_left_subs = []
        f_right_subs = []
        b_right_subs = []
        for k in xrange(len(fsents)):
            bias = k * max_len
            asp_sub = []
            left_sub = []
            right_sub = []
            asub = asubs[k]
            lsub = lsubs[k]
            rsub = rsubs[k]
            # test
            # print bias
            # print asub
            # print seq_lens[k]
            # test
            asp_sub = range(bias + asub[0], bias + asub[1])
            left_sub = range(bias+lsub[0],bias + lsub[1])
            right_sub = range(bias + rsub[0], bias + rsub[1])

            f_asp_subs.append(bias + asub[1] - 1)
            b_asp_subs.append(bias + asub[0])
            f_left_subs.append(bias + lsub[1] -1)
            b_left_subs.append(bias + lsub[0])
            f_right_subs.append(bias + rsub[1] - 1)
            b_right_subs.append(bias + rsub[0])

            aslen = len(asp_sub)
            leftlen = len(left_sub)
            rightlen = len(right_sub)
            while aslen < asp_size:
                asp_sub.append(bias + max_len - 1)
                aslen = aslen + 1
            while leftlen < left_max_len:
                left_sub.append(bias + max_len - 1)
                leftlen = leftlen + 1
            while rightlen < right_max_len:
                right_sub.append(bias + max_len - 1)
                rightlen = rightlen + 1
            asp_subs.append(asp_sub)
            left_subs.append(left_sub)
            right_subs.append(right_sub)

        ret_data = {
            'text_idxes' : fsents,
            'left_ctx_idxes': left_ctx,
            'right_ctx_idxes': right_ctx,
            'batch_ids' : ret_ids,
            'aspect_idxes' : asps,
            'labels' : lab,
            'text_lens' : seq_lens,
            'left_lens': left_seq_lens,
            'right_lens': right_seq_lens,
            'aspect_lens' : asp_lens,
            'text_reverse_lens' : reverse_lens,
            'aspect_subs' : asp_subs,  #所有经过pad的句子拼接在一起后经过pad的方面在其中的下标,
            'left_subs' : left_subs,
            'right_subs' : right_subs,
            'text_bitmap' : sent_bitmap,
            'left_bitmap': left_sent_bitmap,
            'right_bitmap': right_sent_bitmap,
            'f_asp_sub' : f_asp_subs,
            'b_asp_sub' : b_asp_subs,
            'f_left_sub': f_left_subs,
            'b_left_sub': b_left_subs,
            'f_right_sub': f_right_subs,
            'b_right_sub': b_right_subs,
            'alpha_adj' : alpha_adj,
            'abs_poses' : abs_poses,
            'left_abs_poses' : left_abs_poses,
            'right_abs_poses' : right_abs_poses,
            'pos_ids' : pos_ids,
            'left_pos_ids' : left_pos_ids,
            'right_pos_ids' : right_pos_ids
        }
        return ret_data
Пример #2
0
def get_data(sample, class_num, pad_idx, eos):
    labels = []
    texts = []
    aspects = []
    leftsubs = []
    rightsubs = []
    left_ctx = []
    left_ctx_asp = []
    right_ctx = []
    right_ctx_asp = []
    seq_lens = []
    asp_lens = []
    left_seq_lens = []
    right_seq_lens = []
    aspsubs = []
    ids = []

    eos = eos
    class_num = class_num
    pad_idx = pad_idx
    ids.append(sample.id)
    texts.append(sample.text_idxes)
    seq_lens.append(len(sample.text_idxes))
    left_ctx.append(sample.left_context_idxes)
    left_seq_lens.append(len(sample.left_context_idxes))
    tmp = sample.left_context_idxes + sample.aspect_idxes
    tmp.reverse()
    left_ctx_asp.append(tmp)
    right_ctx.append(sample.right_context_idxes)
    right_seq_lens.append(len(sample.right_context_idxes))
    right_ctx_asp.append((sample.aspect_idxes + sample.right_context_idxes))
    aspects.append(sample.aspect_idxes)
    asp_lens.append(len(sample.aspect_idxes))
    aspsubs.append(sample.aspect_wordpos)
    leftsubs.append(range(sample.left_wordpos[1]))
    rightsubs.append(
        range(len(sample.aspect_idxes),
              len(sample.right_context_idxes) + len(sample.aspect_idxes)))
    crt_lab = [0.0] * class_num
    crt_lab[sample.label] = 1.0
    labels.append(crt_lab)
    add_pad(inputs=[left_ctx, right_ctx],
            max_lens=[left_seq_lens[0] + 1, right_seq_lens[0] + 1],
            pad_idx=pad_idx)
    sent_bitmap = bitmap_by_padid(texts, pad_idx)
    left_sent_bitmap = bitmap_by_padid(left_ctx, pad_idx)
    right_sent_bitmap = bitmap_by_padid(right_ctx, pad_idx)
    ret_data = {
        'text_idxes': texts,
        'left_ctx_idxes': left_ctx,
        'right_ctx_idxes': right_ctx,
        'left_ctx_asp': left_ctx_asp,
        'right_ctx_asp': right_ctx_asp,
        'batch_ids': ids,
        'aspect_idxes': aspects,
        'labels': labels,
        'text_lens': seq_lens,
        'left_lens': left_seq_lens,
        'right_lens': right_seq_lens,
        'aspect_lens': asp_lens,
        # 'aspect_subs': asp_subs,  # 所有经过pad的句子拼接在一起后经过pad的方面在其中的下标,
        'left_subs': leftsubs,
        'right_subs': rightsubs,
        'text_bitmap': sent_bitmap,
        'left_bitmap': left_sent_bitmap,
        'right_bitmap': right_sent_bitmap
    }
    return ret_data
Пример #3
0
    def next_batch(self):
        '''
        get the netxt batch_data.
        '''
        self.nN = self.nN + 1
        rins, lab, ret_ids, rinlens, rmaxlens, self.idx, rinlens_float32 = batch_range(
            self.batch_size,
            self.idx,
            self.nsamps,
            self.rand_idx,
            self.class_num,
            self.labels,
            self.ids,
            [self.texts, self.aspsubs, self.aspects]
        )
        fsents = rins[0]
        asubs = rins[1]
        asps = rins[2]
        # context bitmap.
        sent_bitmap = []
        # row sentence lengths.
        sequence_lengs = rinlens[0]
        seq_lens = []
        reverse_lens = []
        for x in xrange(len(sequence_lengs)):
            nl = sequence_lengs[x][0]
            if self.eos:
                nl += 1
            seq_lens.append(nl)  # add the <eos>
            reverse_lens.append(sequence_lengs[x][0])

        # pad index
        add_pad(
            inputs=[fsents],
            max_lens=[rmaxlens[0] + 1],
            pad_idx=self.pad_idx
        )
        max_len = rmaxlens[0] + 1
        sent_bitmap = bitmap_by_padid(fsents, self.pad_idx)
        alpha_adj = copy.deepcopy(sent_bitmap)
        for row in alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break
        # count the aspect lens, and size
        # count the memory size
        asp_lens = []
        asp_size = 0
        mem_size = 0
        for x in xrange(len(seq_lens)):
            sl = seq_lens[x]
            asub = asubs[x]
            al = asub[1] - asub[0]
            asp_lens.append(al)
            if al > asp_size:
                asp_size = al
            ms = sl - al
            if ms > mem_size:
                mem_size = ms

        add_pad(
            inputs=[asps],
            max_lens=[asp_size],
            pad_idx=self.pad_idx
        )
        # build the subs.
        asp_subs = []
        f_asp_subs = []
        b_asp_subs = []
        for k in xrange(len(fsents)):
            bias = k * max_len
            asp_sub = []
            asub = asubs[k]
            # test
            # print bias
            # print asub
            # print seq_lens[k]
            # test
            asp_sub = range(bias + asub[0], bias + asub[1])
            f_asp_subs.append(bias + asub[1] - 1)
            b_asp_subs.append(bias + asub[0])

            aslen = len(asp_sub)
            while aslen < asp_size:
                asp_sub.append(bias + max_len - 1)
                aslen = aslen + 1
            asp_subs.append(asp_sub)

        seq_lens_float32 = []
        for l in seq_lens:
            seq_lens_float32.append([float(l)])

        asp_lens_float32 = []
        for l in asp_lens:
            asp_lens_float32.append([float(l)])

        ret_data = {
            'text_idxes' : fsents,
            'batch_ids' : ret_ids,
            'aspect_idxes' : asps,
            'labels' : lab,
            'text_lens' : seq_lens,
            'text_lens_float32' : seq_lens_float32,
            'aspect_lens' : asp_lens,
            'aspect_lens_float32' : asp_lens_float32,
            'text_reverse_lens' : reverse_lens,
            'aspect_subs' : asp_subs,  #所有经过pad的句子拼接在一起后经过pad的方面在其中的下标,
            'text_bitmap' : sent_bitmap,
            'f_asp_sub' : f_asp_subs,
            'b_asp_sub' : b_asp_subs,
            'alpha_adj' : alpha_adj
        }
        return ret_data
Пример #4
0
def get_data(sample, class_num, pad_idx, eos):
    labels = []
    texts = []
    aspects = []
    leftsubs = []
    rightsubs = []
    left_ctx = []
    left_ctx_asp = []
    right_ctx = []
    right_ctx_asp = []
    seq_lens = []
    asp_lens = []
    left_seq_lens = []
    right_seq_lens = []
    left_ctxasp_lens = []
    right_ctxasp_lens = []
    aspsubs = []
    ids = []

    eos = eos
    class_num = class_num
    pad_idx = pad_idx
    ids.append(sample.id)
    texts.append(sample.text_idxes)
    seq_lens.append(len(sample.text_idxes))
    left_ctx.append(sample.left_context_idxes)
    left_seq_lens.append(len(sample.left_context_idxes))
    tmp = sample.left_context_idxes + sample.aspect_idxes
    tmp.reverse()
    left_ctx_asp.append(tmp)
    right_ctx.append(sample.right_context_idxes)
    right_seq_lens.append(len(sample.right_context_idxes))
    right_ctx_asp.append((sample.aspect_idxes + sample.right_context_idxes))
    left_ctxasp_lens.append(len(left_ctx_asp[0]))
    right_ctxasp_lens.append(len(right_ctx_asp[0]))
    aspects.append(sample.aspect_idxes)
    asp_lens.append(len(sample.aspect_idxes))
    aspsubs.append(sample.aspect_wordpos)
    leftsubs.append(sample.left_wordpos)
    rightsubs.append(sample.aspect_wordpos)
    crt_lab = [0.0] * class_num
    crt_lab[sample.label] = 1.0
    labels.append(crt_lab)
    add_pad(inputs=[texts, left_ctx_asp, right_ctx_asp, left_ctx, right_ctx],
            max_lens=[
                seq_lens[0] + 1, seq_lens[0] + 1, seq_lens[0] + 1,
                left_seq_lens[0], right_seq_lens[0]
            ],
            pad_idx=pad_idx)
    sent_bitmap = bitmap_by_padid(texts, pad_idx)
    left_sent_bitmap = bitmap_by_padid(left_ctx, pad_idx)
    right_sent_bitmap = bitmap_by_padid(right_ctx, pad_idx)
    max_len = seq_lens[0] + 1
    left_max_len = left_seq_lens[0] + 2
    right_max_len = right_seq_lens[0] + 2
    asp_subs = []
    left_subs = []
    right_subs = []
    left_ngrams = []
    left_ngram_lens = []
    right_ngrams = []
    right_ngram_lens = []
    for k in xrange(1):
        bias = 0
        asp_sub = []
        left_sub = []
        right_sub = []
        asub = aspsubs[0]
        lsub = leftsubs[0]
        rsub = rightsubs[0]
        # test
        # print bias
        # print asub
        # print seq_lens[k]
        # test
        asp_sub = range(bias + asub[0], bias + asub[1])
        left_sub = range(bias + lsub[0], bias + lsub[1])
        right_sub = range(bias + rsub[0], bias + rsub[1])

        aslen = len(asp_sub)
        leftlen = len(left_sub)
        rightlen = len(right_sub)

        while leftlen < left_max_len:
            left_sub.append(bias + max_len - 1)
            leftlen = leftlen + 1
        while rightlen < right_max_len:
            right_sub.append(bias + max_len - 1)
            rightlen = rightlen + 1
        left_ngram = []
        left_ngram_len = []
        right_ngram = []
        right_ngram_len = []
        for i in range(1, len(left_sub)):
            left_ngram.append([left_sub[i - 1], left_sub[i]])
            if left_sub[i - 1] == (max_len - 1) or left_sub[i] == (max_len -
                                                                   1):
                left_ngram_len.append(1)
            else:
                left_ngram_len.append(2)
        for i in range(1, len(right_sub)):
            right_ngram.append([right_sub[i - 1], right_sub[i]])
            if right_sub[i - 1] == (max_len - 1) or right_sub[i] == (max_len -
                                                                     1):
                right_ngram_len.append(1)
            else:
                right_ngram_len.append(2)
        asp_subs.append(asp_sub)
        left_subs.append(left_sub)
        right_subs.append(right_sub)
        left_ngrams.append(left_ngram)
        right_ngrams.append(right_ngram)
        left_ngram_lens.append(left_ngram_len)
        right_ngram_lens.append(right_ngram_len)
    asp_mask = []
    for x in range(len(seq_lens)):
        asp_mask.append([])
    for i in range(len(seq_lens)):
        for x in range(left_seq_lens[i]):
            asp_mask[i].append(1.0)
        for x in range(asp_lens[i]):
            asp_mask[i].append(0.5)
        for x in range(right_seq_lens[i]):
            asp_mask[i].append(1.0)
        asp_mask[i].append(0.0)
    ret_data = {
        'text_idxes': texts,
        'left_ctx_idxes': left_ctx,
        'right_ctx_idxes': right_ctx,
        'left_ctx_asp': left_ctx_asp,
        'right_ctx_asp': right_ctx_asp,
        'batch_ids': ids,
        'aspect_idxes': aspects,
        'labels': labels,
        'text_lens': seq_lens,
        'left_lens': left_seq_lens,
        'right_lens': right_seq_lens,
        'left_ca_lens': left_ctxasp_lens,
        'right_ca_lens': right_ctxasp_lens,
        'left_ngram_lens': left_ngram_lens,
        'right_ngram_lens': right_ngram_lens,
        'aspect_lens': asp_lens,
        # 'aspect_subs': asp_subs,  # 所有经过pad的句子拼接在一起后经过pad的方面在其中的下标,
        'left_subs': left_subs,
        'right_subs': right_subs,
        'asp_subs': asp_subs,
        'left_ngrams': left_ngrams,
        'right_ngrams': right_ngrams,
        'text_bitmap': sent_bitmap,
        'left_bitmap': left_sent_bitmap,
        'right_bitmap': right_sent_bitmap,
        'asp_mask': asp_mask
    }
    return ret_data
Пример #5
0
    def next_batch(self):
        '''
        get the netxt batch_data.
        '''
        self.labels = []
        self.texts = []
        self.aspects = []
        self.leftsubs = []
        self.rightsubs = []
        self.left_ctx = []
        self.left_ctx_asp = []
        self.right_ctx = []
        self.right_ctx_asp = []
        self.aspsubs = []
        self.left_aspsubs = []
        self.right_aspsubs = []
        self.ids = []
        samplelist = self.len_dic[self.key_list[self.rand_idx[self.idx]]]
        random.shuffle(samplelist)
        for sample in samplelist:
            self.ids.append(sample.id)
            self.texts.append(sample.text_idxes)
            self.left_ctx.append(sample.left_context_idxes)
            left_tmp = sample.left_context_idxes + sample.aspect_idxes
            left_tmp.reverse()
            self.left_ctx_asp.append(left_tmp)
            right_tmp = sample.aspect_idxes+sample.right_context_idxes
            self.right_ctx_asp.append(right_tmp)
            self.right_ctx.append(sample.right_context_idxes)
            self.aspects.append(sample.aspect_idxes)
            self.aspsubs.append(sample.aspect_wordpos)
            self.left_aspsubs.append([0,(len(sample.aspect_idxes))])
            self.right_aspsubs.append([0,(len(sample.aspect_idxes))])
            self.leftsubs.append(sample.left_wordpos)
            self.rightsubs.append(sample.right_wordpos)
            self.labels.append(sample.label)

        rins, lab, rinlens, rmaxlens, rinlens_float32 = batch_all(
            [self.texts, self.aspsubs, self.aspects, self.left_ctx, self.right_ctx, self.leftsubs, self.rightsubs,
             self.left_ctx_asp, self.right_ctx_asp, self.left_aspsubs, self.right_aspsubs],
            self.labels,
            self.class_num,
        )
        self.idx += 1
        fsents = rins[0]
        asubs = rins[1]
        asps = rins[2]
        left_ctx = rins[3]
        right_ctx = rins[4]
        lsubs = rins[5]
        rsubs = rins[6]
        l_ctx_asp = rins[7]
        r_ctx_asp = rins[8]
        l_asubs = rins[9]
        r_asubs = rins[10]

        # context bitmap.
        sent_bitmap = []
        # row sentence lengths.
        sequence_lengs = rinlens[0]
        left_sequence_lengs = rinlens[3]
        right_sequence_lengs = rinlens[4]
        l_ctx_asp_len =rinlens[7]
        r_ctx_asp_len = rinlens[8]
        seq_lens = []
        left_seq_lens = []
        right_seq_lens = []
        l_ca_len = []
        r_ca_len = []
        reverse_lens = []
        for x in xrange(len(sequence_lengs)):
            nl = sequence_lengs[x][0]
            if self.eos:
                nl += 1
            seq_lens.append(nl)  # add the <eos>
            reverse_lens.append(sequence_lengs[x][0])
            left_seq_lens.append(left_sequence_lengs[x][0])
            right_seq_lens.append(right_sequence_lengs[x][0])
            l_ca_len.append(l_ctx_asp_len[x][0])
            r_ca_len.append(r_ctx_asp_len[x][0])


        # for x in xrange(len(left_sequence_lengs)):
        #     nl = left_sequence_lengs[x][0]
        #     left_seq_lens.append(nl)
        #     # reverse_lens.append(sequence_lengs[x][0])
        # for x in xrange(len(right_sequence_lengs)):
        #     nl = right_sequence_lengs[x][0]
        #     right_seq_lens.append(nl)
        #     # reverse_lens.append(sequence_lengs[x][0])
        left_max_len = rmaxlens[3]
        right_max_len = rmaxlens[4]
        # pad index
        add_pad(
            inputs=[fsents, left_ctx, right_ctx,l_ctx_asp,r_ctx_asp],
            max_lens=[rmaxlens[0]+1, rmaxlens[0], rmaxlens[0],rmaxlens[0]+1,rmaxlens[0]+1],
            pad_idx=self.pad_idx
        )
        max_len = rmaxlens[0]+1
        sent_bitmap = bitmap_by_padid(fsents, self.pad_idx)
        left_sent_bitmap = bitmap_by_padid(left_ctx, self.pad_idx)
        right_sent_bitmap = bitmap_by_padid(right_ctx, self.pad_idx)
        alpha_adj = copy.deepcopy(sent_bitmap)
        for row in alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break

        left_alpha_adj = copy.deepcopy(left_sent_bitmap)
        for row in left_alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break
        right_alpha_adj = copy.deepcopy(right_sent_bitmap)
        for row in right_alpha_adj:
            for i in range(len(row)):
                if row[i] == 1.0:
                    row[i] = 0.0
                else:
                    row[i] = 1.0
                    break
        # count the aspect lens, and size
        # count the memory size

        abs_poses = []
        left_abs_poses = []
        right_abs_poses = []

        pos_ids = []
        left_pos_ids = []
        right_pos_ids = []

        asp_lens = []
        asp_size = 0
        mem_size = 0
        for x in xrange(len(seq_lens)):
            sl = seq_lens[x]
            left_l = left_seq_lens[x]
            right_l = right_seq_lens[x]
            asub = asubs[x]
            al = asub[1] - asub[0]
            asp_lens.append(al)
            if al > asp_size:
                asp_size = al
            ms = sl - al
            if ms > mem_size:
                mem_size = ms
            # count the position
            # left
            lt_pos = range(asub[0] + 1)[1:]
            lt_pid = range(asub[0] + 1)[1:]
            lt_pos.reverse()
            lt_pid.reverse()
            left_start = self.pos_id_range['left_start']
            tmp_lt_pid = np.array(lt_pid)
            tmp_lt_pid += left_start - 1
            lt_pid = tmp_lt_pid.tolist()
            # right
            rt_pos = range(sl - asub[1] + 1)[1:]
            rt_pid = range(sl - asub[1] + 1)[1:]
            right_start = self.pos_id_range['right_start']
            tmp_rt_pid = np.array(rt_pid)
            tmp_rt_pid += right_start - 1
            rt_pid = tmp_rt_pid.tolist()            
            # aspect
            asp_pos = [0 for _ in range(al)]
            asp_pid = [self.pos_id_range['asp'] for _ in range(al)]
            # lest. the pads.
            lest_pos = [0 for _ in range(max_len - sl)]
            l_lest_pos = [0 for _ in range(rmaxlens[3] - left_l)]
            r_lest_pos = [0 for _ in range(rmaxlens[4] - right_l)]
            lest_pid = [self.pos_id_range['oos'] for _ in range(max_len - sl)]
            l_lest_pid = [self.pos_id_range['oos'] for _ in range(rmaxlens[3] - left_l)]
            r_lest_pid = [self.pos_id_range['oos'] for _ in range(rmaxlens[4] - right_l)]
            # build the rets.
            abs_pos = []
            abs_pos.extend(lt_pos)
            abs_pos.extend(asp_pos)
            abs_pos.extend(rt_pos)
            abs_pos.extend(lest_pos)
            abs_poses.append(abs_pos)
            lt_pos.extend(l_lest_pos)
            left_abs_poses.append(lt_pos)
            rt_pos.extend(r_lest_pos)
            right_abs_poses.append(rt_pos)
            pos_id = []
            pos_id.extend(lt_pid)
            pos_id.extend(asp_pid)
            pos_id.extend(rt_pid)
            pos_id.extend(lest_pid)
            pos_ids.append(pos_id)
            lt_pid.extend(l_lest_pid)
            left_pos_ids.append(lt_pid)
            rt_pid.extend(r_lest_pid)
            right_pos_ids.append(rt_pid)
        add_pad(
            inputs=[asps],
            max_lens=[asp_size],
            pad_idx=self.pad_idx
        )
        asp_mask = []
        for x in range(len(seq_lens)):
            asp_mask.append([])
        for i in range(len(seq_lens)):
            for x in range(left_seq_lens[i]):
                asp_mask[i].append(1.0)
            for x in range(asp_lens[i]):
                asp_mask[i].append(0.5)
            for x in range(right_seq_lens[i]):
                asp_mask[i].append(1.0)
            asp_mask[i].append(0.0)

        asp_pos = []
        for x in range(len(seq_lens)):
            asp_pos.append([])
        for i in range(len(seq_lens)):
            for x in range(left_seq_lens[i]):
                asp_pos[i].append(0)
            for x in range(asp_lens[i]):
                asp_pos[i].append(1)
            for x in range(right_seq_lens[i]):
                asp_pos[i].append(0)

        left_a_mask=[]
        for x in range(len(seq_lens)):
            left_a_mask.append([])
        for i in range(len(seq_lens)):
            for x in range(asp_lens[i]):
                left_a_mask[i].append(1)
            for x in range(seq_lens[i] - asp_lens[i]):
                left_a_mask[i].append(0)

        left_mask = []
        for x in range(len(seq_lens)):
            left_mask.append([])
        for i in range(len(seq_lens)):
            for x in range(asp_lens[i]):
                left_mask[i].append(0)
            for x in range(seq_lens[i] - asp_lens[i]):
                left_mask[i].append(1)

        left_mask2 = []
        for x in range(len(seq_lens)):
            left_mask2.append([])
        for i in range(len(seq_lens)):
            for x in range(left_seq_lens[i]):
                left_mask2[i].append(1)
            for x in range(seq_lens[i] - left_seq_lens[i]):
                left_mask2[i].append(0)

        right_mask2 = []
        for x in range(len(seq_lens)):
            right_mask2.append([])
        for i in range(len(seq_lens)):
            for x in range(right_seq_lens[i]):
                right_mask2[i].append(1)
            for x in range(seq_lens[i] - right_seq_lens[i]):
                right_mask2[i].append(0)

        left_asp_mask =[]
        for x in range(len(seq_lens)):
            left_asp_mask.append([])
        for i in range(len(seq_lens)):
            for x in range(l_ca_len[i]):
                left_asp_mask[i].append(1)
            for x in range(seq_lens[i] - l_ca_len[i]):
                left_asp_mask[i].append(0)


        right_asp_mask = []
        for x in range(len(seq_lens)):
            right_asp_mask.append([])
        for i in range(len(seq_lens)):
            for x in range(r_ca_len[i]):
                right_asp_mask[i].append(1)
            for x in range(seq_lens[i] - r_ca_len[i]):
                right_asp_mask[i].append(0)

        # build the subs.
        asp_subs = []
        left_subs = []
        right_subs = []
        f_asp_subs = []
        b_asp_subs = []
        f_left_subs = []
        b_left_subs = []
        f_right_subs = []
        b_right_subs = []
        window_subs = []
        window_lens = []
        for k in xrange(len(fsents)):
            bias = k * max_len
            asp_sub = []
            left_sub = []
            right_sub = []
            asub = asubs[k]
            lsub = lsubs[k]
            rsub = rsubs[k]
            # test
            # print bias
            # print asub
            # print seq_lens[k]
            # test
            asp_sub = range(bias + asub[0], bias + asub[1])
            left_sub = range(bias+lsub[0],bias + lsub[1])
            right_sub = range(bias + rsub[0], bias + rsub[1])

            f_asp_subs.append(bias + asub[1] - 1)
            b_asp_subs.append(bias + asub[0])
            f_left_subs.append(bias + lsub[1] -1)
            b_left_subs.append(bias + lsub[0])
            f_right_subs.append(bias + rsub[1] - 1)
            b_right_subs.append(bias + rsub[0])
            window_sub = []
            window_len = []
            window_size = 5
            w = []
            for x in range(len(fsents[k])):
                lenth = 0
                for s in range(1,window_size):
                    if x - s<0:
                        left_1 = bias + max_len - 1
                    else:
                        left_1 = bias + fsents[k][x - 2]
                        lenth += 1
                    w.append(left_1)
                # if x -1 < 0 :
                #     left_2 = bias + max_len - 1
                # else:
                #     left_2 = bias + fsents[k][x - 1]
                #     lenth += 1
                for s in range(1, window_size):
                    if x + s >= max_len:
                        right_1 = bias + max_len - 1
                    else:
                        right_1 = bias + fsents[k][x + 1]
                        lenth += 1
                    w.append(right_1)
                # if x + 2 >= max_len:
                #     right_2 = bias + max_len - 1
                # else:
                #     right_2 = bias + fsents[k][x + 2]
                #     lenth += 1
                if lenth == 0:
                    lenth +=1
                window_len.append(lenth)
                window_sub.append(w)
            window_subs.append(window_sub)
            window_lens.append(window_len)
            aslen = len(asp_sub)
            leftlen = len(left_sub)
            rightlen = len(right_sub)
            while aslen < asp_size:
                asp_sub.append(bias + max_len - 1)
                aslen = aslen + 1
            while leftlen < left_max_len:
                left_sub.append(bias + max_len - 1)
                leftlen = leftlen + 1
            while rightlen < right_max_len:
                right_sub.append(bias + max_len - 1)
                rightlen = rightlen + 1
            asp_subs.append(asp_sub)
            left_subs.append(left_sub)
            right_subs.append(right_sub)

        ret_data = {
            'text_idxes' : fsents,
            'left_ctx_idxes': left_ctx,
            'right_ctx_idxes': right_ctx,
            'left_ctx_asp': l_ctx_asp,
            'right_ctx_asp':r_ctx_asp,
            'batch_ids' : self.ids,
            'aspect_idxes' : asps,
            'labels' : lab,
            'text_lens' : seq_lens,
            'left_lens': left_seq_lens,
            'right_lens': right_seq_lens,
            'left_ca_lens':l_ca_len,
            'right_ca_lens':r_ca_len,
            'aspect_lens' : asp_lens,
            'text_reverse_lens' : reverse_lens,
            'aspect_subs' : asp_subs,  #所有经过pad的句子拼接在一起后经过pad的方面在其中的下标,
            'window_subs' : window_subs,
            'window_lens' : window_lens,
            # 'left_subs' : left_subs,
            # 'right_subs' : right_subs,
            'text_bitmap' : sent_bitmap,
            'left_bitmap': left_sent_bitmap,
            'right_bitmap': right_sent_bitmap,
            # 'f_asp_sub' : f_asp_subs,
            # 'b_asp_sub' : b_asp_subs,
            # 'f_left_sub': f_left_subs,
            # 'b_left_sub': b_left_subs,
            # 'f_right_sub': f_right_subs,
            # 'b_right_sub': b_right_subs,
            'alpha_adj' : alpha_adj,
            'abs_poses' : abs_poses,
            'left_abs_poses' : left_abs_poses,
            'right_abs_poses' : right_abs_poses,
            'pos_ids' : pos_ids,
            'left_pos_ids' : left_pos_ids,
            'right_pos_ids' : right_pos_ids,
            'asp_mask': asp_mask,
            'left_a_mask': left_a_mask,
            'left_mask': left_mask,
            'left_mask2':left_mask2,
            'right_mask2': right_mask2,
            'left_asp_mask': left_asp_mask,
            'right_asp_mask': right_asp_mask,
            'asp_pos': asp_pos
        }
        return ret_data