Пример #1
0
def test_6_compare_row_and_col_wise_fill():
    args = get_args()
    args.seq_len = 128
    tokens_0, seg_0 = generate_seg(args, table_a, row_wise_fill=True)
    tokens_1, seg_1 = generate_seg(args, table_b, row_wise_fill=True)
    seg = torch.LongTensor([seg_0, seg_1])
    check_segs(zip([seg_0, seg_1], [tokens_0, tokens_1]))
    mask = generate_mask(seg)
    import ipdb
    ipdb.set_trace()
    args = get_args()
    args.seq_len = 128
    tokens_0, seg_0 = generate_seg(args, table_a, row_wise_fill=True)
    tokens_1, seg_1 = generate_seg(args, table_b, row_wise_fill=True)
    seg = torch.LongTensor([seg_0, seg_1])
    check_segs(zip([seg_0, seg_1], [tokens_0, tokens_1]))
    mask = generate_mask(seg)
Пример #2
0
def test_2_bigger_table():
    from col_spec_yh.store_utils import test_decode_spider_file
    tab_file = 'demos/samples/sample_file_type0-1.tb'
    tab_cols_list = test_decode_spider_file(tab_file)

    args = get_args()
    seg_list = []
    for tab_col in tab_cols_list:
        _, seg = generate_seg(args, tab_col, row_wise_fill=True)
        seg_list.append(seg)
    seg = torch.LongTensor(seg_list)
    mask = generate_mask(seg)  # mask.shape: torch.Size([10, 1, 64, 64])
    import ipdb
    ipdb.set_trace()
Пример #3
0
def test_3_too_much_empty_values():
    args = get_args()
    args.seq_len = 16
    tokens_0, seg_0 = generate_seg(args,
                                   table_with_empty_values_1,
                                   row_wise_fill=True)
    tokens_1, seg_1 = generate_seg(args,
                                   table_with_empty_values_2,
                                   row_wise_fill=True)
    seg = torch.LongTensor([seg_0, seg_1])
    check_segs(zip([seg_0, seg_1], [tokens_0, tokens_1]))
    mask = generate_mask(seg)
    import ipdb
    ipdb.set_trace()
Пример #4
0
def check_segs(iter):
    args = get_args()
    for (seg, tokens) in iter:
        # seg = seg % 10000
        i = 0
        s = 0
        now = seg[s] % 10000
        while s < len(seg):
            while i < len(seg) and seg[i] % 10000 == now:
                i += 1
            print(seg[s:i])
            print(args.tokenizer.convert_ids_to_tokens(tokens[s:i]))
            s = i
            if s < len(seg):
                now = seg[s] % 10000
Пример #5
0
def test_7_additional_ban():
    args = get_args()
    args.row_wise_fill = False
    args.seq_len = 128
    tokens_0, seg_0 = generate_seg(args,
                                   table_a,
                                   row_wise_fill=args.row_wise_fill)
    tokens_1, seg_1 = generate_seg(args,
                                   table_b,
                                   row_wise_fill=args.row_wise_fill)
    seg = torch.LongTensor([seg_0, seg_1])
    check_segs(zip([seg_0, seg_1], [tokens_0, tokens_1]))
    mask = generate_mask(seg, additional_ban=2)
    import ipdb
    ipdb.set_trace()