def get_loss(model, data_label, tasks, use_gpu, word_decoding, char_decoding,
             max_char_len, bucketing,batch_size,
             symbolic_end=1, add_end_char=1, add_start_char=1,
             symbolic_root=1,
             verbose=1):

    ponderation_normalize_loss = model.arguments["hyperparameters"]["ponderation_normalize_loss"]
    weight_pos_loss = model.arguments["hyperparameters"]["weight_pos_loss"]
    weight_binary_loss = model.arguments["hyperparameters"]["weight_binary_loss"]
    dataset = [REPO_LABEL2SET[_data_label] for _data_label in data_label]
    printing("SANITY TEST performed on {}".format(dataset), verbose=verbose, verbose_level=1)
    readers_dev = readers_load(datasets=dataset,
                               tasks=tasks, word_dictionary=model.word_dictionary,
                               word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary,
                               pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary,
                               type_dictionary=model.type_dictionary, use_gpu=use_gpu,
                               norm_not_norm="norm_not_norm" in tasks, word_decoder=word_decoding,
                               add_start_char=add_start_char, add_end_char=add_end_char, symbolic_end=symbolic_end,
                               symbolic_root=symbolic_root, bucket=bucketing, max_char_len=max_char_len,
                               verbose=verbose)

    batchIter_eval = data_gen_multi_task_sampling_batch(tasks=tasks, readers=readers_dev, batch_size=batch_size,
                                                        word_dictionary=model.word_dictionary,
                                                        char_dictionary=model.char_dictionary,
                                                        word_dictionary_norm=model.word_nom_dictionary,
                                                        pos_dictionary=model.pos_dictionary, dropout_input=0,
                                                        extend_n_batch=1, get_batch_mode=False, verbose=verbose)

    printing("SANITY TEST EVALUATION : computing loss ", verbose=verbose, verbose_level=2)

    loss_obj = LossCompute(model.generator, use_gpu=use_gpu, verbose=verbose,
                           multi_task_loss_ponderation=model.multi_task_loss_ponderation,
                           use="dev",
                           pos_pred="pos" in tasks,
                           tasks=tasks,
                           vocab_char_size=len(list(model.char_dictionary.instance2index.keys())) + 1,
                           char_decoding=char_decoding, word_decoding=word_decoding,
                           auxilliary_task_norm_not_norm="norm_not_norm" in tasks)

    print("PONDERATION", ponderation_normalize_loss)

    loss_dev, loss_details_dev, step_dev = run_epoch(batchIter_eval, model, loss_compute=loss_obj,
                                                     verbose=verbose, timing="", step=0,
                                                     weight_binary_loss=weight_binary_loss,
                                                     ponderation_normalize_loss=ponderation_normalize_loss,
                                                     weight_pos_loss=weight_pos_loss,
                                                     pos_batch="pos" in tasks,
                                                     log_every_x_batch=100)

    return loss_dev, loss_details_dev, step_dev
def _test_iterator_multi_task(batch_size,
                              get_batch_mode,
                              tasks,
                              print_raw=False):

    data_set = [TEST]
    #tasks = ["normalize"]
    norm_not_norm = False
    word_decoder = False
    extend_n_batch = 1

    word_dictionary, word_dictionary_norm, char_dictionary, pos_dictionary, \
    xpos_dictionary, type_dictionary = conllu_data.create_dict(dict_path="../dictionaries/",
                                                               train_path=LIU_DEV,
                                                               dev_path=LIU_DEV,
                                                               test_path=None,
                                                               word_embed_dict={},
                                                               dry_run=False, pos_specific_data_set=DEMO,
                                                               tasks=tasks,
                                                               add_start_char=1)

    readers = readers_load(datasets=data_set,
                           tasks=tasks,
                           word_dictionary=word_dictionary,
                           word_dictionary_norm=word_dictionary_norm,
                           char_dictionary=char_dictionary,
                           pos_dictionary=pos_dictionary,
                           xpos_dictionary=xpos_dictionary,
                           type_dictionary=type_dictionary,
                           use_gpu=None,
                           norm_not_norm=norm_not_norm,
                           word_decoder=word_decoder,
                           add_start_char=1,
                           add_end_char=1,
                           symbolic_end=True,
                           symbolic_root=True,
                           verbose=1)

    iterator_multi = data_gen_multi_task_sampling_batch(
        tasks=tasks,
        readers=readers,
        batch_size=batch_size,
        word_dictionary=word_dictionary,
        word_dictionary_norm=word_dictionary_norm,
        char_dictionary=char_dictionary,
        pos_dictionary=pos_dictionary,
        get_batch_mode=get_batch_mode,
        extend_n_batch=extend_n_batch,
        print_raw=print_raw,
        verbose=1)

    counter_sent_input = 0
    while True:
        try:
            batch = iterator_multi.__next__()
            print(batch)
            counter_sent_input += batch.input_seq.size(0)
        except StopIteration:
            break

    return counter_sent_input, readers
示例#3
0
                              tasks=tasks,
                              pos_specific_data_set=train_path[1] if len(tasks) > 1 and "pos" in tasks else None,
                              case=case,
                              add_start_char=1 if run_mode == "train" else None,
                              verbose=1)

    readers_train = readers_load(datasets=train_path,
                                 tasks=tasks,
                                 word_dictionary=word_dictionary,
                                 word_dictionary_norm=word_norm_dictionary,
                                 char_dictionary=char_dictionary,
                                 pos_dictionary=pos_dictionary,
                                 xpos_dictionary=xpos_dictionary,
                                 type_dictionary=type_dictionary,
                                 use_gpu=False,
                                 norm_not_norm=word_normalization,
                                 word_decoder=word_normalization,
                                 add_start_char=1,
                                 add_end_char=1,
                                 symbolic_end=1,
                                 symbolic_root=1,
                                 bucket=False,
                                 max_char_len=20,
                                 must_get_norm=word_normalization,
                                 verbose=1)

    batchIter = data_gen_multi_task_sampling_batch(
        tasks=tasks,
        readers=readers_train,
        batch_size=batch_size,
        word_dictionary=word_dictionary,
示例#4
0
def bpe_statistics_on_data(data_set,
                           dict_path,
                           mask_token_index,
                           bert_model="bert-cased",
                           bert_token_classification=None,
                           print_raw=False,
                           early_breaking=None,
                           output="bpe_stat",
                           verbose=1):

    voc_tokenizer = BERT_MODEL_DIC[bert_model]["vocab"]

    tokenizer = BertTokenizer.from_pretrained(voc_tokenizer)

    word_dictionary, word_dictionary_norm, char_dictionary, pos_dictionary, \
    xpos_dictionary, type_dictionary = conllu_data.create_dict(dict_path=dict_path,
                                                               train_path=data_set,
                                                               dev_path=data_set,
                                                               test_path=None,
                                                               word_embed_dict={},
                                                               expand_vocab_bool=False,
                                                               word_normalization=True,
                                                               tasks=["normalize"],
                                                               dry_run=False,
                                                               pos_specific_data_set=None,
                                                               add_start_char=1)

    data_set = [data_set]
    tasks = ["norm_not_norm"]
    readers = readers_load(datasets=data_set,
                           tasks=tasks,
                           word_dictionary=word_dictionary,
                           word_dictionary_norm=word_dictionary_norm,
                           char_dictionary=char_dictionary,
                           pos_dictionary=pos_dictionary,
                           xpos_dictionary=xpos_dictionary,
                           type_dictionary=type_dictionary,
                           use_gpu=None,
                           norm_not_norm=True,
                           word_decoder=True,
                           add_start_char=1,
                           add_end_char=1,
                           symbolic_end=True,
                           symbolic_root=True,
                           verbose=1)

    iterator_multi = data_gen_multi_task_sampling_batch(
        tasks=tasks,
        readers=readers,
        batch_size=1,
        word_dictionary=word_dictionary,
        char_dictionary=char_dictionary,
        pos_dictionary=pos_dictionary,
        word_dictionary_norm=word_dictionary_norm,
        extend_n_batch=1,
        print_raw=print_raw,
        get_batch_mode=False,
        verbose=1)
    if output == "bpe_stat":
        bpe_counter, bpe_normed_counter, bpe_need_norm_counter = stat_bpe(
            iterator_multi,
            tokenizer,
            mask_token_index=mask_token_index,
            null_token_index=BERT_MODEL_DIC[bert_model]["vocab_size"])
        return bpe_counter, bpe_normed_counter, bpe_need_norm_counter
    elif output == "embedding":
        out = get_embedding_mat(iterator_multi,
                                tokenizer,
                                bert_token_classification,
                                null_token_index=1,
                                early_breaking=early_breaking,
                                mask_token_index=mask_token_index,
                                verbose=verbose)
        return out
示例#5
0
def train(train_path,
          dev_path,
          n_epochs,
          normalization,
          dict_path=None,
          pos_specific_path=None,
          expand_vocab_dev_test=False,
          checkpointing_metric="loss-dev-all",
          batch_size=10,
          test_path=None,
          label_train="",
          label_dev="",
          use_gpu=None,
          lr=0.001,
          n_layers_word_encoder=1,
          n_layers_sent_cell=1,
          get_batch_mode_all=True,
          dropout_sent_encoder_cell=0,
          dropout_word_encoder_cell=0,
          dropout_word_decoder_cell=0,
          dropout_bridge=0,
          drop_out_word_encoder_out=0,
          drop_out_sent_encoder_out=0,
          dir_word_encoder=1,
          word_embed=False,
          word_embedding_dim=None,
          word_embedding_projected_dim=None,
          mode_word_encoding="cat",
          char_level_embedding_projection_dim=0,
          word_recurrent_cell_encoder=None,
          word_recurrent_cell_decoder=None,
          drop_out_char_embedding_decoder=0,
          hidden_size_encoder=None,
          output_dim=None,
          char_embedding_dim=None,
          hidden_size_decoder=None,
          hidden_size_sent_encoder=None,
          freq_scoring=5,
          compute_scoring_curve=False,
          score_to_compute_ls=None,
          mode_norm_ls=None,
          checkpointing=True,
          freq_checkpointing=None,
          freq_writer=None,
          model_dir=None,
          reload=False,
          model_full_name=None,
          model_id_pref="",
          print_raw=False,
          model_specific_dictionary=False,
          dir_sent_encoder=1,
          add_start_char=None,
          add_end_char=1,
          overall_label="DEFAULT",
          overall_report_dir=CHECKPOINT_DIR,
          compute_mean_score_per_sent=False,
          weight_binary_loss=1,
          dense_dim_auxilliary=None,
          dense_dim_auxilliary_2=None,
          unrolling_word=False,
          char_src_attention=False,
          debug=False,
          timing=False,
          dev_report_loss=True,
          bucketing=True,
          policy=None,
          teacher_force=True,
          proportion_pred_train=None,
          shared_context="all",
          clipping=None,
          extend_n_batch=1,
          stable_decoding_state=False,
          init_context_decoder=True,
          dense_dim_auxilliary_pos=None,
          dense_dim_auxilliary_pos_2=None,
          tasks=None,
          word_decoding=False,
          char_decoding=True,
          dense_dim_word_pred=None,
          dense_dim_word_pred_2=None,
          dense_dim_word_pred_3=None,
          symbolic_root=False,
          symbolic_end=False,
          extern_emb_dir=None,
          activation_word_decoder=None,
          activation_char_decoder=None,
          extra_arg_specific_label="",
          freezing_mode=False,
          freeze_ls_param_prefix=None,
          multi_task_loss_ponderation=None,
          max_char_len=None,
          attention_tagging=False,
          dropout_input=None,
          optimizer="adam",
          verbose=1):

    if multi_task_loss_ponderation is not None:
        sanity_check_loss_poneration(multi_task_loss_ponderation,
                                     verbose=verbose)
    if teacher_force:
        assert proportion_pred_train is None, "proportion_pred_train should be None as teacher_force mode"
    else:
        assert 100 > proportion_pred_train > 0, "proportion_pred_train should be between 0 and 100"
    auxilliary_task_norm_not_norm = "norm_not_norm" in tasks  # auxilliary_task_norm_not_norm
    auxilliary_task_pos = "pos" in tasks
    if "normalize" not in tasks:
        word_decoding = False
        char_decoding = False
    if not unrolling_word:
        assert not char_src_attention, "ERROR attention requires step by step unrolling  "
    printing("WARNING bucketing is {} ",
             var=bucketing,
             verbose=verbose,
             verbose_level=1)
    if freq_writer is None:
        freq_writer = freq_checkpointing
        printing("REPORTING freq_writer set to freq_checkpointing {}",
                 var=[freq_checkpointing],
                 verbose=verbose,
                 verbose_level=1)
    if auxilliary_task_norm_not_norm:
        printing(
            "MODEL : training model with auxillisary task (loss weighted with {})",
            var=[weight_binary_loss],
            verbose=verbose,
            verbose_level=1)
    #if compute_scoring_curve:
    #assert score_to_compute_ls is not None and mode_norm_ls is not None and freq_scoring is not None, \
    #    "ERROR score_to_compute_ls and mode_norm_ls should not be None"
    use_gpu = use_gpu_(use_gpu)
    hardware_choosen = "GPU" if use_gpu else "CPU"
    printing("{} hardware mode ",
             var=([hardware_choosen]),
             verbose_level=0,
             verbose=verbose)
    freq_checkpointing = int(
        n_epochs / 10
    ) if checkpointing and freq_checkpointing is None else freq_checkpointing
    assert add_start_char == 1, "ERROR : add_start_char must be activated due decoding behavior of output_text_"
    printing("WARNING : add_start_char is {} and add_end_char {}  ".format(
        add_start_char, add_end_char),
             verbose=verbose,
             verbose_level=0)
    printing("TRAINING : checkpointing every {} epoch",
             var=freq_checkpointing,
             verbose=verbose,
             verbose_level=1)
    if reload:
        assert model_full_name is not None and len(
            model_id_pref
        ) == 0 and model_dir is not None and dict_path is not None
    else:
        assert model_full_name is None and model_dir is None

    if not debug:
        pdb.set_trace = lambda: None

    loss_training = []
    loss_developing = []
    # was not able to use the template cause no more reinitialization of the variable
    loss_details_template = {
        'loss_seq_prediction': [],
        'other': {},
        'loss_binary': [],
        'loss_overall': []
    } if auxilliary_task_norm_not_norm else None

    # used for computed scores for early stoping if checkpoint_metric != loss and for curves plot
    evaluation_set_reporting = dev_path
    mode_norm_ls = ["all"]
    score_to_compute_ls = ["exact_match"]
    print(
        "WARNING :train.py overwriting mode_norm_ls score_to_compute_ls argument "
    )
    curve_scores = {
        score + "-" + mode_norm + "-" + REPO_DATASET[data]: []
        for score in score_to_compute_ls for mode_norm in mode_norm_ls
        for data in evaluation_set_reporting
    } if compute_scoring_curve else None

    printing("WARNING :  lr {} ".format(lr, add_start_char, add_end_char),
             verbose=verbose,
             verbose_level=0)
    printing(
        "INFO : dictionary is computed (re)created from scratch on train_path {} and dev_path {}"
        .format(train_path, dev_path),
        verbose=verbose,
        verbose_level=1)

    if not model_specific_dictionary:
        word_dictionary, char_dictionary, pos_dictionary, \
        xpos_dictionary, type_dictionary = \
        conllu_data.load_dict(dict_path=dict_path,
                              train_path=train_path,
                              dev_path=dev_path,
                              test_path=test_path,
                              word_embed_dict={},
                              dry_run=False,
                              force_new_dic=True,
                              add_start_char=add_start_char, verbose=1)

        voc_size = len(char_dictionary.instance2index) + 1
        word_voc_input_size = len(word_dictionary.instance2index) + 1
        printing("DICTIONARY ; character vocabulary is len {} : {} ",
                 var=str(
                     len(char_dictionary.instance2index) + 1,
                     char_dictionary.instance2index),
                 verbose=verbose,
                 verbose_level=0)
        _train_path, _dev_path, _add_start_char = None, None, None
    else:
        voc_size = None
        word_voc_input_size = 0
        if not reload:
            # we need to feed the model the data so that it computes the model_specific_dictionary
            _train_path = train_path
            _dev_path = dev_path
            _test_path = test_path
            _add_start_char = add_start_char
        else:
            # as it reload : we don't need data
            _train_path, _dev_path, _test_path, _add_start_char = None, None, None, None

    model = LexNormalizer(
        generator=Generator,
        expand_vocab_dev_test=expand_vocab_dev_test,
        dense_dim_auxilliary=dense_dim_auxilliary,
        dense_dim_auxilliary_2=dense_dim_auxilliary_2,
        tasks=tasks,
        weight_binary_loss=weight_binary_loss,
        dense_dim_auxilliary_pos=dense_dim_auxilliary_pos,
        dense_dim_auxilliary_pos_2=dense_dim_auxilliary_pos_2,
        load=reload,
        char_embedding_dim=char_embedding_dim,
        voc_size=voc_size,
        dir_model=model_dir,
        use_gpu=use_gpu,
        dict_path=dict_path,
        word_recurrent_cell_decoder=word_recurrent_cell_decoder,
        word_recurrent_cell_encoder=word_recurrent_cell_encoder,
        train_path=_train_path,
        dev_path=_dev_path,
        pos_specific_path=pos_specific_path,
        add_start_char=_add_start_char,
        model_specific_dictionary=model_specific_dictionary,
        dir_word_encoder=dir_word_encoder,
        drop_out_sent_encoder_cell=dropout_sent_encoder_cell,
        drop_out_word_encoder_cell=dropout_word_encoder_cell,
        drop_out_word_decoder_cell=dropout_word_decoder_cell,
        drop_out_bridge=dropout_bridge,
        drop_out_char_embedding_decoder=drop_out_char_embedding_decoder,
        drop_out_word_encoder_out=drop_out_word_encoder_out,
        drop_out_sent_encoder_out=drop_out_sent_encoder_out,
        n_layers_word_encoder=n_layers_word_encoder,
        dir_sent_encoder=dir_sent_encoder,
        n_layers_sent_cell=n_layers_sent_cell,
        hidden_size_encoder=hidden_size_encoder,
        output_dim=output_dim,
        model_id_pref=model_id_pref,
        model_full_name=model_full_name,
        hidden_size_sent_encoder=hidden_size_sent_encoder,
        shared_context=shared_context,
        unrolling_word=unrolling_word,
        char_src_attention=char_src_attention,
        word_decoding=word_decoding,
        dense_dim_word_pred=dense_dim_word_pred,
        dense_dim_word_pred_2=dense_dim_word_pred_2,
        dense_dim_word_pred_3=dense_dim_word_pred_3,
        char_decoding=char_decoding,
        mode_word_encoding=mode_word_encoding,
        char_level_embedding_projection_dim=char_level_embedding_projection_dim,
        stable_decoding_state=stable_decoding_state,
        init_context_decoder=init_context_decoder,
        symbolic_root=symbolic_root,
        symbolic_end=symbolic_end,
        word_embed=word_embed,
        word_embedding_dim=word_embedding_dim,
        word_embedding_projected_dim=word_embedding_projected_dim,
        word_embed_dir=extern_emb_dir,
        word_voc_input_size=word_voc_input_size,
        teacher_force=teacher_force,
        activation_char_decoder=activation_char_decoder,
        activation_word_decoder=activation_word_decoder,
        test_path=_test_path,
        extend_vocab_with_test=_test_path is not None,
        attention_tagging=attention_tagging,
        multi_task_loss_ponderation=
        multi_task_loss_ponderation,  # needed for save/reloading purposes
        hidden_size_decoder=hidden_size_decoder,
        verbose=verbose,
        timing=timing)

    pos_batch = auxilliary_task_pos

    if use_gpu:
        model = model.cuda()
        printing("TYPE model is cuda : {} ",
                 var=(next(model.parameters()).is_cuda),
                 verbose=verbose,
                 verbose_level=4)
        #model.decoder.attn_layer = model.decoder.attn_layer.cuda()
    if not model_specific_dictionary:
        model.word_dictionary, model.char_dictionary, model.pos_dictionary, \
        model.xpos_dictionary, model.type_dictionary = word_dictionary, char_dictionary, pos_dictionary, xpos_dictionary, type_dictionary

    starting_epoch = model.arguments["info_checkpoint"][
        "n_epochs"] if reload else 1
    reloading = "" if not reload else " reloaded from " + str(starting_epoch)
    n_epochs += starting_epoch
    if freezing_mode:
        assert freeze_ls_param_prefix is not None, "freeze_ls_param_prefix should not be None"
        printing("TRAINING : freezing is on for layers {} ",
                 var=[freeze_ls_param_prefix],
                 verbose=verbose,
                 verbose_level=1)
        for name, param in model.named_parameters():
            for freeze_param in freeze_ls_param_prefix:
                if name.startswith(freeze_param):
                    param.requires_grad = False
                    printing("TRAINING : freezing {} parameter ",
                             var=[name],
                             verbose=verbose,
                             verbose_level=1)

    _loss_dev = 1000
    checkpoint_score_saved = 1000
    _loss_train = 1000
    counter_no_deacrease = 0
    saved_epoch = 1
    if reload:
        printing(
            "TRAINING : RELOADED MODE , starting from checkpointed epoch {} ",
            var=starting_epoch,
            verbose_level=0,
            verbose=verbose)
    printing(
        "TRAINING : Running from {} to {} epochs : training on {} evaluating on {}",
        var=(starting_epoch, n_epochs, train_path, dev_path),
        verbose=verbose,
        verbose_level=0)
    starting_time = time.time()
    total_time = 0
    x_axis_epochs = []
    epoch_ls_dev = []
    epoch_ls_train = []

    train_path = [train_path] if isinstance(train_path, str) else train_path
    dev_path = [dev_path] if isinstance(dev_path, str) else dev_path

    readers_train = readers_load(
        datasets=train_path,
        tasks=tasks,
        word_dictionary=model.word_dictionary,
        word_dictionary_norm=model.word_nom_dictionary,
        char_dictionary=model.char_dictionary,
        pos_dictionary=model.pos_dictionary,
        xpos_dictionary=model.xpos_dictionary,
        type_dictionary=model.type_dictionary,
        use_gpu=use_gpu,
        norm_not_norm=auxilliary_task_norm_not_norm,
        word_decoder=word_decoding,
        add_start_char=add_start_char,
        add_end_char=add_end_char,
        symbolic_end=symbolic_end,
        symbolic_root=symbolic_root,
        bucket=bucketing,
        max_char_len=max_char_len,
        verbose=verbose)

    readers_dev = readers_load(datasets=dev_path,
                               tasks=tasks,
                               word_dictionary=model.word_dictionary,
                               word_dictionary_norm=model.word_nom_dictionary,
                               char_dictionary=model.char_dictionary,
                               pos_dictionary=model.pos_dictionary,
                               xpos_dictionary=model.xpos_dictionary,
                               type_dictionary=model.type_dictionary,
                               use_gpu=use_gpu,
                               norm_not_norm=auxilliary_task_norm_not_norm,
                               word_decoder=word_decoding,
                               add_start_char=add_start_char,
                               add_end_char=add_end_char,
                               symbolic_end=symbolic_end,
                               symbolic_root=symbolic_root,
                               bucket=bucketing,
                               max_char_len=max_char_len,
                               verbose=verbose)

    dir_writer = os.path.join(overall_report_dir, "runs",
                              "{}-model".format(model.model_full_name))
    writer = SummaryWriter(log_dir=dir_writer)
    printing(
        "REPORT : run \ntensorboard --logdir={} --host=localhost --port=9101 "
        "(run tensorboard remotely : sh $EXPERIENCE/track/run_tensorboard_serveo.sh $log_dir $port )  ",
        var=[dir_writer],
        verbose=verbose,
        verbose_level=1)
    printing("REPORT : summary writer will be located {}",
             var=[dir_writer],
             verbose_level=1,
             verbose=verbose)
    step_train = 0
    step_dev = 0
    if ADAPTABLE_SCORING:
        printing("WARNING : scoring epochs not regular (more at the begining ",
                 verbose_level=1,
                 verbose=verbose)
        freq_scoring = 1
    checkpoint_dir_former = None

    for epoch in tqdm(range(starting_epoch, n_epochs),
                      disable_tqdm_level(verbose=verbose, verbose_level=0)):
        index_look = 25
        #parameters = filter(lambda p: p.requires_grad, model.parameters())
        decay_rate = 1
        opt = dptx.get_optimizer(model.parameters(),
                                 lr=lr * decay_rate**epoch,
                                 optimizer="adam")
        assert policy in AVAILABLE_SCHEDULING_POLICIES
        policy_dic = eval(policy)(epoch) if policy is not None else None
        #TODO : no need of re-ouptuting multi_task_mode : tasks should be harmonized to read
        multi_task_mode, ponderation_normalize_loss, weight_binary_loss, weight_pos_loss = scheduling_policy(
            epoch=epoch, phases_ls=policy_dic, tasks=tasks)
        printing(
            "TRAINING Tasks scheduling : ponderation_normalize_loss is {} weight_binary_loss is {}"
            " weight_pos_loss is {} mode is {} ",
            var=[
                ponderation_normalize_loss, weight_binary_loss,
                weight_pos_loss, multi_task_mode
            ],
            verbose=verbose,
            verbose_level=2)

        printing("TRAINING : Starting {} epoch out of {} ",
                 var=(epoch + 1, n_epochs),
                 verbose=verbose,
                 verbose_level=1)
        model.train()
        #batchIter = data_gen_conllu(data_read_train,model.word_dictionary, model.char_dictionary,normalization=normalization,get_batch_mode=get_batch_mode_all,batch_size=batch_size, extend_n_batch=extend_n_batch,print_raw=print_raw, timing=timing, pos_dictionary=model.pos_dictionary,verbose=verbose)
        batchIter = data_gen_multi_task_sampling_batch(
            tasks=tasks,
            readers=readers_train,
            batch_size=batch_size,
            word_dictionary=model.word_dictionary,
            char_dictionary=model.char_dictionary,
            pos_dictionary=model.pos_dictionary,
            word_dictionary_norm=model.word_nom_dictionary,
            get_batch_mode=get_batch_mode_all,
            extend_n_batch=extend_n_batch,
            dropout_input=dropout_input,
            verbose=verbose)
        start = time.time()
        printing(
            "TRAINING : TEACHER FORCE : Schedule Sampling proportion of train on prediction is {} ",
            var=[proportion_pred_train],
            verbose=verbose,
            verbose_level=2)

        #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch=epoch)

        loss_train, loss_details_train, step_train = run_epoch(
            batchIter,
            model,
            LossCompute(
                model.generator,
                opt=opt,
                multi_task_loss_ponderation=model.multi_task_loss_ponderation,
                auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm,
                model=model,
                writer=writer,
                use="train",
                use_gpu=use_gpu,
                verbose=verbose,
                tasks=tasks,
                char_decoding=char_decoding,
                word_decoding=word_decoding,
                pos_pred=auxilliary_task_pos,
                vocab_char_size=len(
                    list(model.char_dictionary.instance2index.keys())) + 1,
                timing=timing),
            verbose=verbose,
            i_epoch=epoch,
            multi_task_mode=multi_task_mode,
            n_epochs=n_epochs,
            timing=timing,
            weight_binary_loss=weight_binary_loss,
            weight_pos_loss=weight_pos_loss,
            ponderation_normalize_loss=ponderation_normalize_loss,
            step=step_train,
            clipping=clipping,
            pos_batch=pos_batch,
            proportion_pred_train=proportion_pred_train,
            log_every_x_batch=100)

        writer_weights_and_grad(model=model,
                                freq_writer=freq_writer,
                                epoch=epoch,
                                writer=writer,
                                verbose=verbose)

        _train_ep_time, start = get_timing(start)
        model.eval()
        # TODO : should be added in the freq_checkpointing orhterwise useless
        #batchIter_eval = data_gen_conllu(data_read_dev,model.word_dictionary, model.char_dictionary,batch_size=batch_size, get_batch_mode=False,normalization=normalization, extend_n_batch=1,pos_dictionary=model.pos_dictionary, verbose=verbose)
        batchIter_eval = data_gen_multi_task_sampling_batch(
            tasks=tasks,
            readers=readers_dev,
            batch_size=batch_size,
            word_dictionary=model.word_dictionary,
            char_dictionary=model.char_dictionary,
            word_dictionary_norm=model.word_nom_dictionary,
            pos_dictionary=model.pos_dictionary,
            dropout_input=0,
            extend_n_batch=1,
            get_batch_mode=False,
            verbose=verbose)
        _create_iter_time, start = get_timing(start)
        # TODO : should be able o factorize this to have a single run_epoch() for train and dev (I think the computaiton would be same )
        # TODO : should not evaluate for each epoch : should evalaute every x epoch : check if it decrease and checkpoint
        if (dev_report_loss and
            (epoch % freq_checkpointing == 0)) or (epoch + 1 == n_epochs):
            printing("EVALUATION : computing loss on dev epoch {}  ",
                     var=epoch,
                     verbose=verbose,
                     verbose_level=1)
            loss_obj = LossCompute(
                model.generator,
                use_gpu=use_gpu,
                verbose=verbose,
                multi_task_loss_ponderation=model.multi_task_loss_ponderation,
                writer=writer,
                use="dev",
                vocab_char_size=len(
                    list(model.char_dictionary.instance2index.keys())) + 1,
                pos_pred=auxilliary_task_pos,
                tasks=tasks,
                char_decoding=char_decoding,
                word_decoding=word_decoding,
                auxilliary_task_norm_not_norm=auxilliary_task_norm_not_norm)
            loss_dev, loss_details_dev, step_dev = run_epoch(
                batchIter_eval,
                model,
                loss_compute=loss_obj,
                i_epoch=epoch,
                n_epochs=n_epochs,
                verbose=verbose,
                timing=timing,
                step=step_dev,
                weight_binary_loss=weight_binary_loss,
                ponderation_normalize_loss=ponderation_normalize_loss,
                weight_pos_loss=weight_pos_loss,
                pos_batch=pos_batch,
                log_every_x_batch=100)

            loss_developing.append(loss_dev)
            epoch_ls_dev.append(epoch)

            if auxilliary_task_norm_not_norm:
                # in this case we report loss detail
                for ind, loss_key in enumerate(loss_details_dev.keys()):
                    if loss_key != "other":
                        loss_details_template[loss_key].append(
                            loss_details_dev[loss_key])
            else:
                loss_details_template = None

        _eval_time, start = get_timing(start)
        loss_training.append(loss_train)
        epoch_ls_train.append(epoch)
        time_per_epoch = time.time() - starting_time
        total_time += time_per_epoch
        starting_time = time.time()

        # computing exact/edit score
        exact_only = False
        overall_report_ls = None
        # MODIFIED FREQ SCORING TO FREQ CHECKPOINTING

        if compute_scoring_curve and (
            (epoch %
             (freq_checkpointing if checkpointing_metric != "loss-dev-all" else
              freq_scoring) == 0) or (epoch + 1 == n_epochs)):
            if epoch < 1 and ADAPTABLE_SCORING:
                freq_scoring *= 5
            if epoch > 4 and epoch < 6 and ADAPTABLE_SCORING:
                freq_scoring *= 3
            if epoch > 14 and epoch < 15 and ADAPTABLE_SCORING:
                freq_scoring *= 2
            if (epoch + 1 == n_epochs):
                printing("EVALUATION : final scoring ",
                         verbose,
                         verbose_level=0)
            x_axis_epochs.append(epoch)
            printing("EVALUATION : Computing score on {} and {}  ",
                     var=(score_to_compute_ls, mode_norm_ls),
                     verbose=verbose,
                     verbose_level=1)
            overall_report_ls = []
            for task, eval_data in zip(tasks, evaluation_set_reporting):
                eval_label = REPO_DATASET[eval_data]
                assert len(set(evaluation_set_reporting)) == len(evaluation_set_reporting),\
                    "ERROR : twice the same dataset has been provided for reporting which will mess up the loss"
                printing("EVALUATION on {} ",
                         var=[eval_data],
                         verbose=verbose,
                         verbose_level=1)
                scores = evaluate(
                    data_path=eval_data,
                    use_gpu=use_gpu,
                    overall_label=overall_label,
                    overall_report_dir=overall_report_dir,
                    score_to_compute_ls=score_to_compute_ls,
                    mode_norm_ls=mode_norm_ls,
                    label_report=eval_label,
                    model=model,
                    normalization=normalization,
                    print_raw=False,
                    model_specific_dictionary=True,
                    get_batch_mode_evaluate=False,
                    compute_mean_score_per_sent=compute_mean_score_per_sent,
                    batch_size=batch_size,
                    word_decoding=word_decoding,
                    dir_report=model.dir_model,
                    debug=debug,
                    evaluated_task=task,
                    tasks=tasks,
                    verbose=verbose)
                # we keep everythinghere in case we want to do some fancy early stopping metric
                overall_report_ls.extend(scores)

                # dirty but do the job
                exact_only = True
                DEPRECIATED = False
                if DEPRECIATED:
                    curve_scores = update_curve_dic(
                        score_to_compute_ls=score_to_compute_ls,
                        mode_norm_ls=mode_norm_ls,
                        eval_data=eval_label,
                        former_curve_scores=curve_scores,
                        scores=scores,
                        exact_only=exact_only)
                    curve_ls_tuple = [
                        (loss_ls, label)
                        for label, loss_ls in curve_scores.items()
                        if isinstance(loss_ls, list)
                    ]
                    curves = [tupl[0] for tupl in curve_ls_tuple]
                    val_ls = [
                        tupl[1] + "({}tok)".format(info_token)
                        for tupl in curve_ls_tuple
                        for data, info_token in curve_scores.items()
                        if not isinstance(info_token, list)
                        if tupl[1].endswith(data)
                    ]
            score_to_compute_ls = ["exact"
                                   ] if exact_only else score_to_compute_ls
            if DEPRECIATED:
                for score_plot in score_to_compute_ls:
                    # dirty but do the job
                    print(val_ls)
                    if exact_only:
                        val_ls = [
                            val for val in val_ls
                            if val.startswith("exact-all")
                            or val.startswith("exact-NORMED")
                            or val.startswith("exact-NEED_NORM")
                        ]
                        #val_ls = ["{}-all-{}".format(metric,REPO_DATASET[eval]) for eval in evaluation_set_reporting for metric in ["exact", "edit"]]
                        curves = [curve for curve in curves if len(curve) > 0]

                    simple_plot_ls(losses_ls=curves,
                                   labels=val_ls,
                                   final_loss="",
                                   save=True,
                                   filter_by_label=score_plot,
                                   x_axis=x_axis_epochs,
                                   dir=model.dir_model,
                                   prefix=model.model_full_name,
                                   epochs=str(epoch) + reloading,
                                   verbose=verbose,
                                   lr=lr,
                                   label_color_0=REPO_DATASET[
                                       evaluation_set_reporting[0]],
                                   label_color_1=REPO_DATASET[
                                       evaluation_set_reporting[1]])

        # WARNING : only saving if we decrease not loading former model if we relaod
        if (checkpointing
                and epoch % freq_checkpointing == 0) or (epoch + 1
                                                         == n_epochs):
            if checkpointing_metric != "loss-dev-all" and epoch < STARTING_CHECKPOINTING_WITH_SCORE:
                _checkpointing_metric = "loss-dev-all"
            elif checkpointing_metric != "loss-dev-all":
                _checkpointing_metric = checkpointing_metric
                if epoch == STARTING_CHECKPOINTING_WITH_SCORE:
                    checkpoint_score_saved = -report["score"]
                    printing("Checkoint info : switching "
                             "checkpoint_score_saved to {} : {}".format(
                                 checkpointing_metric, checkpoint_score_saved),
                             verbose_level=1,
                             verbose=verbose)
            elif checkpointing_metric == "loss-dev-all":
                _checkpointing_metric = checkpointing_metric
            else:
                raise (Exception("You missed a case"))

            dir_plot_detailed = simple_plot(
                final_loss=0,
                epoch_ls_1=epoch_ls_dev,
                epoch_ls_2=epoch_ls_dev,
                loss_2=loss_details_template.get("loss_binary", None),
                loss_ls=loss_details_template["loss_seq_prediction"],
                epochs=str(epoch) + reloading,
                label="dev-seq_prediction",
                label_2="dev-binary",
                save=True,
                dir=model.dir_model,
                verbose=verbose,
                verbose_level=1,
                lr=lr,
                prefix=model.model_full_name + "-details",
                show=False) if loss_details_template is not None else None

            dir_plot = simple_plot(final_loss=loss_train,
                                   loss_2=loss_developing,
                                   loss_ls=loss_training,
                                   epochs=str(epoch) + reloading,
                                   epoch_ls_1=epoch_ls_train,
                                   epoch_ls_2=epoch_ls_dev,
                                   label=label_train + "-train",
                                   label_2=label_dev + "-dev",
                                   save=True,
                                   dir=model.dir_model,
                                   verbose=verbose,
                                   verbose_level=1,
                                   lr=lr,
                                   prefix=model.model_full_name,
                                   show=False)

            sanity_check_checkpointing_metric(
                tasks, checkpointing_metric=_checkpointing_metric)

            if _checkpointing_metric != "loss-dev-all" or \
                    (epoch == (STARTING_CHECKPOINTING_WITH_SCORE-1) and checkpointing_metric != "loss-dev-all"):
                # for now only useful when different from loss --> compute metric on dev all and default always
                # assuing unitask thanks to sanity check
                assert overall_report_ls is not None, "ERROR overall_report_ls  was not defined "
                report = rep_tl.get_score(
                    overall_report_ls,
                    metric=TASKS_PARAMETER[tasks[0]].get("default_metric"),
                    data=REPO_DATASET[dev_path[0]],
                    info_score="all",
                    task=tasks[0])
                # Negative cause it's an accuracy
                checkpoint_score = -report["score"]
            else:
                checkpoint_score = loss_dev

            model, checkpoint_score_saved, counter_no_deacrease, saved_epoch, checkpoint_dir_former = \
                    checkpoint(loss_saved=checkpoint_score_saved, loss=checkpoint_score,
                               checkpointing_metric=_checkpointing_metric,
                               model=model, counter_no_decrease=counter_no_deacrease,
                               checkpoint_dir_former=checkpoint_dir_former,
                               saved_epoch=saved_epoch, model_dir=model.dir_model,
                               extra_checkpoint_label="1st_train" if not reload else "start_{}_ep-{}".format(starting_epoch, extra_arg_specific_label),
                               extra_arg_specific_label=extra_arg_specific_label,
                               info_checkpoint={"n_epochs": epoch, "batch_size": batch_size, "optimizer": optimizer,
                                                "gradient_clipping": clipping,
                                                "tasks_schedule_policy": policy,
                                                "teacher_force": teacher_force,
                                                "proportion_pred_train": proportion_pred_train,
                                                "train_data_path": train_path, "dev_data_path": dev_path,
                                                "other": {"error_curves": dir_plot, "loss": loss_dev,
                                                          "sanity_test": {"loss": loss_dev,
                                                                          "data": [REPO_DATASET[_dev_path] for _dev_path in dev_path],
                                                                          "batch_size": batch_size},
                                                          "error_curves_details": dir_plot_detailed,
                                                          "dropout_input": dropout_input,
                                                          "checkpointing_metric": _checkpointing_metric,
                                                          "multi_task_loss_ponderation": multi_task_loss_ponderation,
                                                          "weight_binary_loss": weight_binary_loss*int(auxilliary_task_norm_not_norm),
                                                          "weight_pos_loss": weight_pos_loss*int(auxilliary_task_pos),
                                                          "ponderation_normalize_loss": ponderation_normalize_loss,
                                                          "data": "dev", "seed(np/torch)": (SEED_NP, SEED_TORCH),
                                                          "extend_n_batch": extend_n_batch,
                                                          "lr": lr, "optim_strategy": "lr_constant",
                                                          "time_training(min)": "{0:.2f}".format(total_time/60),
                                                          "average_per_epoch(min)": "{0:.2f}".format((total_time/n_epochs)/60)}},
                               epoch=epoch, epochs=n_epochs-1,
                               keep_all_checkpoint=False if epoch > starting_epoch else True,# we have nothing to remove after 1st epoch
                               verbose=verbose)
            if counter_no_deacrease * freq_checkpointing >= BREAKING_NO_DECREASE:
                printing(
                    "CHECKPOINTING : Breaking training : loss did not decrease on dev for 10 checkpoints "
                    "so keeping model from {} epoch  ".format(saved_epoch),
                    verbose=verbose,
                    verbose_level=0)
                break
        printing(
            "LOSS train {:.3f}, dev {:.3f} for epoch {} out of {} epochs ",
            var=(loss_train, loss_dev, epoch, n_epochs),
            verbose=verbose,
            verbose_level=1)

        if timing:
            print("Summary : {}".format(
                OrderedDict([("_train_ep_time", _train_ep_time),
                             ("_create_iter_time", _create_iter_time),
                             ("_eval_time", _eval_time)])))

    writer.close()
    printing(
        "REPORT : run : \n tensorboard --logdir={} --host=localhost --port=9101  ",
        var=[dir_writer],
        verbose=verbose,
        verbose_level=1)

    #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="LAST")

    simple_plot(final_loss=loss_dev,
                loss_ls=loss_training,
                loss_2=loss_developing,
                epoch_ls_1=epoch_ls_train,
                epoch_ls_2=epoch_ls_dev,
                epochs=n_epochs,
                save=True,
                dir=model.dir_model,
                label=label_train,
                label_2=label_dev,
                lr=lr,
                prefix=model.model_full_name + "-LAST",
                verbose=verbose)

    return model.model_full_name
示例#6
0
    inv_word_dic = word_dictionary.instance2index
    # load , mask, bucket and index data
    tokenizer = BertTokenizer.from_pretrained(voc_tokenizer)
    assert tokenizer is not None, "ERROR : tokenizer is None , voc_tokenizer failed to be loaded {}".format(
        voc_tokenizer)
    if run_mode == "train":
        readers_train = readers_load(
            datasets=args.train_path,
            tasks=args.tasks,
            word_dictionary=word_dictionary,
            word_dictionary_norm=word_norm_dictionary,
            char_dictionary=char_dictionary,
            pos_dictionary=pos_dictionary,
            xpos_dictionary=xpos_dictionary,
            type_dictionary=type_dictionary,
            use_gpu=use_gpu_hardcoded_readers,
            norm_not_norm=auxilliary_task_norm_not_norm,
            word_decoder=True,
            add_start_char=1,
            add_end_char=1,
            symbolic_end=1,
            symbolic_root=1,
            bucket=True,
            max_char_len=20,
            must_get_norm=True,
            verbose=verbose)

        readers_dev = readers_load(
            datasets=args.dev_path,
            tasks=args.tasks,
            word_dictionary=word_dictionary,
            word_dictionary_norm=word_norm_dictionary,
def evaluate(batch_size, data_path, tasks, evaluated_task,
             write_report=True, dir_report=None,
             dict_path=None, model_full_name=None,
             score_to_compute_ls=None, mode_norm_ls=None, get_batch_mode_evaluate=True,
             overall_label="ALL_MODELS", overall_report_dir=CHECKPOINT_DIR, bucket=False,
             model_specific_dictionary=True, label_report="",
             print_raw=False,
             model=None,
             compute_mean_score_per_sent=False, write_output=False,
             word_decoding=False, char_decoding=True,
             extra_arg_specific_label="", scoring_func_sequence_pred="BLUE",
             max_char_len=None,
             normalization=True, debug=False,
             force_new_dic=False,
             use_gpu=None, verbose=0):
    assert model_specific_dictionary, "ERROR : only model_specific_dictionary = True supported now"
    # NB : now : you have to load dictionary when evaluating (cannot recompute) (could add in the LexNormalizer ability)
    use_gpu = use_gpu_(use_gpu)
    hardware_choosen = "GPU" if use_gpu else "CPU"
    printing("{} mode ", var=([hardware_choosen]), verbose_level=0, verbose=verbose)
    printing("EVALUATION : evaluating with compute_mean_score_per_sent {}".format(compute_mean_score_per_sent), verbose=verbose, verbose_level=1)

    if mode_norm_ls is None:
        mode_norm_ls = ["all", "NORMED", "NEED_NORM"]
    if write_report:
        assert dir_report is not None
    if model is not None:
        assert model_full_name is None and dict_path is None, \
            "ERROR as model is provided : model_full_name and dict_path should be None"
    else:
        assert model_full_name is not None and dict_path is not None,\
            "ERROR : model_full_name and dict_path required to load model "
    voc_size = None
    if not debug:
        pdb.set_trace = lambda: 1

    model = LexNormalizer(generator=Generator, load=True, model_full_name=model_full_name,
                          tasks=tasks,
                          word_decoding=word_decoding, char_decoding=char_decoding,
                          voc_size=voc_size, use_gpu=use_gpu, dict_path=dict_path, model_specific_dictionary=True,
                          dir_model=os.path.join(PROJECT_PATH, "checkpoints", model_full_name + "-folder"),
                          extra_arg_specific_label=extra_arg_specific_label,
                          loading_sanity_test=True,
                          verbose=verbose
                          ) if model is None else model

    if score_to_compute_ls is None:
        score_to_compute_ls = ["edit", "exact"]
        if model.auxilliary_task_norm_not_norm:
            score_to_compute_ls.extend(SCORE_AUX)

    printing("EVALUATION : Evaluating {} metric with details {}  ", var=[score_to_compute_ls, mode_norm_ls], verbose=verbose, verbose_level=3)

    #rep_tl.checkout_layer_name("encoder.seq_encoder.weight_ih_l0", model.named_parameters(), info_epoch="EVAL")

    readers_eval = readers_load(datasets=[data_path], tasks=[evaluated_task], word_dictionary=model.word_dictionary,
                                word_dictionary_norm=model.word_nom_dictionary, char_dictionary=model.char_dictionary,
                                pos_dictionary=model.pos_dictionary, xpos_dictionary=model.xpos_dictionary,
                                type_dictionary=model.type_dictionary, use_gpu=use_gpu,
                                norm_not_norm=model.auxilliary_task_norm_not_norm, word_decoder=word_decoding,
                                bucket=bucket,max_char_len=max_char_len,
                                add_start_char=1, add_end_char=1, symbolic_end=model.symbolic_end, symbolic_root=model.symbolic_root,
                                verbose=verbose)
    batchIter = data_gen_multi_task_sampling_batch(tasks=[evaluated_task], readers=readers_eval, batch_size=batch_size,
                                                   word_dictionary=model.word_dictionary,
                                                   char_dictionary=model.char_dictionary,
                                                   pos_dictionary=model.pos_dictionary,
                                                   get_batch_mode=get_batch_mode_evaluate,
                                                   word_dictionary_norm=model.word_nom_dictionary,
                                                   extend_n_batch=1, dropout_input=0,
                                                   verbose=verbose)

    model.eval()
    # the formulas comes from normalization_erros functions
    score_dic_new, formulas = greedy_decode_batch(char_dictionary=model.char_dictionary, verbose=verbose, gold_output=True,
                                                  score_to_compute_ls=score_to_compute_ls, use_gpu=use_gpu,
                                                  write_output=write_output, eval_new=True,
                                                  task_simultaneous_eval=[evaluated_task],
                                                  stat="sum", mode_norm_score_ls=mode_norm_ls,
                                                  label_data=REPO_DATASET[data_path],
                                                  batchIter=batchIter, model=model,
                                                  scoring_func_sequence_pred=scoring_func_sequence_pred,
                                                  compute_mean_score_per_sent=compute_mean_score_per_sent,
                                                  batch_size=batch_size)
    for score_name, formula in formulas.items():
        if isinstance(formula, tuple) and len(formula) > 1:
            (num, denom) = formula
            score_value = score_dic_new[num]/score_dic_new[denom] if score_dic_new[denom] > 0 else None
            #score_value_per_sent =
            if score_dic_new[denom] == 0:
                print("WARNING Score {} has denumerator {} null and numerator {} equal to  {}".format(score_name, denom,
                                                                                                      num,
                                                                                                      score_dic_new[num]
                                                                                                      ))
            reg = re.match("([^-]+)-([^-]+)-.*", num)
            mode_norm = reg.group(1)
            task = reg.group(2)
            # report all in a dictionary
            if not reportint_unavailable:
                report = report_template(metric_val=score_name,
                                         info_score_val=mode_norm,
                                         score_val=score_value,
                                         n_sents=score_dic_new["n_sents"],
                                         avg_per_sent=0,
                                         n_tokens_score=score_dic_new.get(mode_norm+"-"+task+"-gold-count",-1),
                                         model_full_name_val=model.model_full_name,
                                         task=task,
                                         report_path_val=model.arguments["checkpoint_dir"],
                                         evaluation_script_val="exact_match",
                                         model_args_dir=model.args_dir,
                                         data_val=REPO_DATASET[data_path])
            else:
                report = {"report ":0}
            over_all_report_dir = os.path.join(dir_report, model.model_full_name + "-report-" + label_report + ".json")
            over_all_report_dir_all_models = os.path.join(overall_report_dir, overall_label + "-report.json")
            writing_mode = "w" if not os.path.isfile(over_all_report_dir) else "a"
            writing_mode_all_models = "w" if not os.path.isfile(over_all_report_dir_all_models) else "a"
            for dir, writing_mode in zip([over_all_report_dir, over_all_report_dir_all_models],
                                         [writing_mode, writing_mode_all_models]):
                if writing_mode == "w":
                    _all_report = [report]
                    json.dump([report], open(dir, writing_mode))
                    printing("REPORT : Creating new report  {} ".format(dir), verbose=verbose, verbose_level=1)
                else:
                    all_report = json.load(open(dir, "r"))
                    all_report.append(report)
                    json.dump(all_report, open(dir, "w"))
    printing("NEW REPORT metric : {} ", var=[" ".join(list(formulas.keys()))], verbose=verbose, verbose_level=1)
    try:
        printing("NEW REPORT : model specific report saved {} ".format(over_all_report_dir), verbose=verbose, verbose_level=1)
        printing("NEW REPORT : overall report saved {} ".format(over_all_report_dir_all_models), verbose=verbose,verbose_level=1)
    except Exception as e:
        print(Exception(e))
    if writing_mode == "w":
        all_report = _all_report
    return all_report