def get_test_keras_bert_iterator(data_path, file_name):
  it = get_test_data_iterator(data_path, file_name)
  for source, target, cat_source, cat_target, id in it:
    data_source = _get_indices(text=source)
    data_target = _get_indices(text=target)
    
    seg_source = jieba.lcut(source)
    seg_target = jieba.lcut(target)
    bm25 = calculate_bm25_similarity(bm25Model, seg_source, seg_target)
    tf_cosine = calculate_tf_cosine_similarity(seg_source, seg_target)
    tfidf_cosine = calculate_tfidf_cosine_similarity(seg_source, seg_target, bm25Model.idf)
    
    yield data_source['input_ids'], data_source['token_type_ids'], data_source['attention_mask'],           data_target['input_ids'], data_target['token_type_ids'], data_target['attention_mask'],           bm25, tf_cosine, tfidf_cosine,           cat_source, cat_target,           id
def get_keras_bert_iterator(data_path, file_names, tokenizer):
    while True:
        data_it = get_data_iterator(data_path, file_names)
        for source, target, cat_source, cat_target, labelA, labelB in data_it:
            data_source = _get_indices(text=source)
            data_target = _get_indices(text=target)
#             print(indices, type(indices), len(indices))
            seg_source = jieba.lcut(source)
            seg_target = jieba.lcut(target)
            bm25 = calculate_bm25_similarity(bm25Model, seg_source, seg_target)
            tf_cosine = calculate_tf_cosine_similarity(seg_source, seg_target)
            tfidf_cosine = calculate_tfidf_cosine_similarity(seg_source, seg_target, bm25Model.idf)
            yield data_source['input_ids'], data_source['token_type_ids'], data_source['attention_mask'],                   data_target['input_ids'], data_target['token_type_ids'], data_target['attention_mask'],                   bm25, tf_cosine, tfidf_cosine,                   cat_source, cat_target,                   labelA, labelB