def snli_config2(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from model.feed_forward_network import FFNWithCrossCompare
    return {
        "model_fn": FFNWithCrossCompare,
        "model_dict": {
            "word_embedding": vocabulary.embedding_matrix,
            "n_classes": 3,
            "hidden_size": 400,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 32,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "FFN_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-3,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }
def snli_config7(is_debug, output_log=None):
    from model.sentence_pair_graph import ConcatPreprocessWrapper
    import numpy as np
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    # character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    delimeter_idx = len(vocabulary.id_to_word_dict)
    summary_idx = len(vocabulary.id_to_word_dict) + 1
    embedding_matrix = vocabulary.embedding_matrix
    embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0)
    from model.transformer_lm import dotdict
    return {
        "model_fn": TestModel,
        "model_dict": {
            "cfg": dotdict({
                'n_embd': 768,
                'n_head': 1,
                'n_layer': 1,
                'embd_pdrop': 0.1,
                'attn_pdrop': 0.1,
                'resid_pdrop': 0.1,
                'afn': 'gelu',
                'clf_pdrop': 0.1}),
            "clf_token": summary_idx, "vocabulary_size": embedding_matrix.shape[0],
            "n_ctx": 80 + 2
        },
        "pre_process_module_fn": ConcatPreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "delimeter_idx": delimeter_idx,
            "summary_node_idx": summary_idx,
            "max_length": 80,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": 1,
        "name": "GGNNGraphModel_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        # "optimizer_dict": {
        #                    "schedule": 'warmup_linear',
        #                    "warmup": 0.002,
        #                    "t_total": (100//8)*300,
        #                    "b1": 0.9,
        #                    "b2": 0.999,
        #                    "e": 1e-8,
        #                    "l2": 0.01,
        #                    "vector_l2": 'store_true',
        #                    "max_grad_norm": 1},
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 0.01, },
        "epcohes": 300,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
def snli_config6(is_debug, output_log=None):
    from model.sentence_pair_graph import ConcatPreprocessWrapper
    import numpy as np
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    # character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    delimeter_idx = len(vocabulary.id_to_word_dict)
    summary_idx = len(vocabulary.id_to_word_dict) + 1
    embedding_matrix = vocabulary.embedding_matrix
    embedding_matrix = np.concatenate((embedding_matrix, np.random.randn(2, embedding_matrix.shape[1])), axis=0)
    return {
        "model_fn": GGNNGraphModel,
        "model_dict": {
            "word_embedding": embedding_matrix,
            "max_length": 80,
            "hidden_state_size": 756,
            "n_classes": 3,
        },
        "pre_process_module_fn": ConcatPreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "delimeter_idx": delimeter_idx,
            "summary_node_idx": summary_idx,
            "max_length": 80,
        },
        "data": [train, valid, test],
        "batch_size": 8,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": 1,
        "name": "GGNNGraphModel_snli",
        "optimizer": OpenAIAdam,
        "need_pad": True,
        "optimizer_dict": {
                           "schedule": 'warmup_linear',
                           "warmup": 0.002,
                           "t_total": (100//8)*300,
                           "b1": 0.9,
                           "b2": 0.999,
                           "e": 1e-8,
                           "l2": 0.01,
                           "vector_l2": 'store_true',
                           "max_grad_norm": 1},
        "epcohes": 300,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
def snli_config5(is_debug, output_log=None):
    from model.sentence_pair_graph import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from model.sentence_pair_graph import SequenceGraphModelWithGraphAttention
    return {
        "model_fn": SequenceGraphModelWithGraphAttention,
        "model_dict": {
            "word_embedding": vocabulary.embedding_matrix,
            "character_number": len(character_vocabulary.character_to_id_dict),
            "mixed": True,
            "character_embedding_dim": 600,
            "character_n_filters": 200,
            "character_kernel_size": 5,
            "character_padding": 2,
            "hidden_size": 128,
            "graph_itr": 1,
            "dynamic_graph_n_layer": 2,
            "graph_attention_n_head": 6,
            "leaky_alpha": 0.2,
            "n_classes": 3,
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 80,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "SequenceGraphModelWithGraphAttention_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 160,
        "lr": 3e-3,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }
def load_snli_data(is_debug, max_total_len=80, word_vector_name='glove_300d'):
    """
    sentence1: sentence 1
    tokens1: token list of sentence 1, split by ' '
    tokens_id1: token id list of sentence 1 with begin token 1 and end token 1
    character_ids1:
    sentence2: like before
    tokens2:  like before
    tokens_id2: like before
    character_ids2:
    tokens_len1: token 1 length
    tokens_len2:
    total_len: token_len1 + token_len2
    gold_label: target label. one of ['entailment', 'neutral', 'contradiction']
    label: target label id [0, 1, 2].
    :param is_debug:
    :param max_total_len:
    :return: dataframe obj
    """
    train_df = read_snli_split_train_data()
    valid_df = read_snli_split_valid_data()
    test_df = read_snli_split_test_data()
    dfs = [train_df, valid_df, test_df]
    print('after read data: ')
    for i in range(len(dfs)):
        print(len(dfs[i]))
    vocab = load_snli_vocabulary(word_vector_name)
    character_vocab = load_snli_character_vocabulary(n_gram=1)
    dfs = [parse_tokens_id(df, vocab) for df in dfs]
    dfs = [parse_character_id(df, character_vocab) for df in dfs]
    dfs = [parse_label_id(df) for df in dfs]
    print('after parse token to id')

    dfs = [df[df['total_len'] < max_total_len] for df in dfs]
    print('after filter longer than {} data: '.format(max_total_len))
    for i in range(len(dfs)):
        print(len(dfs[i]))

    if is_debug:
        dfs = [df[:100] for df in dfs]

    return dfs
def snli_config3(is_debug, output_log=None):
    from model.self_attention_model import PreprocessWrapper
    from read_data.snli.read_snli_experiment_data import load_dict_data
    from read_data.snli.load_snli_vocabulary import load_snli_vocabulary, load_snli_character_vocabulary
    train, valid, test = load_dict_data(debug=is_debug, )
    vocabulary = load_snli_vocabulary("glove_300d")
    character_vocabulary = load_snli_character_vocabulary(n_gram=1)
    from qanet.qanet import QANet
    return {
        "model_fn": QANet,
        "model_dict": {
            "word_embedding_matrix": vocabulary.embedding_matrix,
            "char_embedding_matrix": None,
            "params": {

                "word_embed_dim": 300,

                "highway_n_layers": 2,

                "hidden_size": 128,

                "embed_encoder_resize_kernel_size": 7,
                "embed_encoder_resize_pad": 3,

                "embed_encoder_n_blocks": 1,
                "embed_encoder_n_conv": 4,
                "embed_encoder_kernel_size": 7,
                "embed_encoder_pad": 3,
                "embed_encoder_conv_type": "depthwise_separable",
                "embed_encoder_with_self_attn": False,
                "embed_encoder_n_heads": 8,

                "model_encoder_n_blocks": 7,
                "model_encoder_n_conv": 2,
                "model_encoder_kernel_size": 7,
                "model_encoder_pad": 3,
                "model_encoder_conv_type": "depthwise_separable",
                "model_encoder_with_self_attn": False,
                "model_encoder_n_heads": 8,

                "batch_size": 128,
            }
        },
        "pre_process_module_fn": PreprocessWrapper,
        "pre_process_module_dict": {
            "pad_idx": vocabulary.word_to_id(vocabulary.pad),
            "character_pad_idx": character_vocabulary.character_to_id_dict[character_vocabulary.PAD],
        },
        "data": [train, valid, test],
        "batch_size": 128,
        "train_loss": nn.CrossEntropyLoss,
        "clip_norm": None,
        "name": "QANet_snli",
        "optimizer": optim.Adam,
        "need_pad": True,
        "optimizer_dict": {"betas": (0.9, 0.999), "weight_decay": 3e-7, },
        "epcohes": 80,
        "lr": 1e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index())],
        "epoch_ratio": 0.1,
    }