Exemplo n.º 1
0
def main(train_file_to_use, test_file_to_use, test_type, features_combination_list, lamda, comp):
    # for perm in itertools.combinations(features_combination_list_sub, 4):
    #    features_combination_list.append(list(perm))

    # start all combination of features
    for features_combination in features_combination_list:

        print('{}: Start creating MEMM for features : {}'.format(time.asctime(time.localtime(time.time())),
                                                                 features_combination))
        logging.info('{}: Start creating MEMM for features : {}'.format(time.asctime(time.localtime(time.time())),
                                                                        features_combination))
        train_start_time = time.time()
        memm_class = MEMM(directory, train_file_to_use, features_combination)

        logging.info('{}: Finish MEMM for features : {}'.format(time.asctime(time.localtime(time.time())),
                                                                features_combination))
        print('{}: Finish MEMM for features : {}'.format(time.asctime(time.localtime(time.time())),
                                                         features_combination))

        print('{}: Start gradient for features : {} and lambda: {}'.
              format(time.asctime(time.localtime(time.time())), features_combination, lamda))
        logging.info('{}: Start gradient for features : {} and lambda: {}'.
                     format(time.asctime(time.localtime(time.time())), features_combination, lamda))
        gradient_class = Gradient(model=memm_class, lambda_value=lamda)
        gradient_result = gradient_class.gradient_descent()

        train_run_time = (time.time() - train_start_time) / 60.0
        print('{}: Finish gradient for features : {} and lambda: {}. run time: {}'.
              format(time.asctime(time.localtime(time.time())), features_combination, lamda, train_run_time))
        logging.info('{}: Finish gradient for features : {} and lambda: {}. run time: {}'.
                     format(time.asctime(time.localtime(time.time())), features_combination, lamda, train_run_time))

        weights = gradient_result.x
        #   np.savetxt(gradient_file, weights, delimiter=",")

        viterbi_start_time = time.time()
        print('{}: Start viterbi'.format((time.asctime(time.localtime(time.time())))))
        viterbi_class = viterbi(memm_class, data_file=test_file_to_use, w=weights)
        viterbi_result = viterbi_class.viterbi_all_data
        viterbi_run_time = (time.time() - viterbi_start_time) / 60.0
        print('{}: Finish viterbi. run time: {}'.format((time.asctime(time.localtime(time.time()))), viterbi_run_time))
        logging.info('{}: Finish viterbi. run time: {}'.format((time.asctime(time.localtime(time.time()))),
                                                               viterbi_run_time))

        write_file_name = datetime.now().strftime(directory + 'file_results/result_MEMM_most_common_tags_' + test_type +
                                                  '%d_%m_%Y_%H_%M.wtag')
        confusion_file_name = datetime.now().strftime(directory + 'confusion_files/CM_MEMM_most_common_tags_' + test_type +
                                                      '%d_%m_%Y_%H_%M.xls')
        evaluate_class = Evaluate(memm_class, test_file_to_use, viterbi_result, write_file_name,
                                  confusion_file_name, comp=comp)
        if not comp:
            word_results_dictionary = evaluate_class.run()
        if comp:
            evaluate_class.write_result_doc()
        logging.info('{}: The model hyper parameters: \n lambda:{} \n test file: {} \n train file: {}'
                     .format(time.asctime(time.localtime(time.time())), lamda, test_file_to_use, train_file_to_use))
        logging.info('{}: Related results files are: \n {} \n {}'.
                     format(time.asctime(time.localtime(time.time())), write_file_name, confusion_file_name))

        # print(word_results_dictionary)
        summary_file_name = '{0}analysis/summary_{1}_{2.day}_{2.month}_{2.year}_{2.hour}_{2.minute}.csv' \
            .format(directory, test_type, datetime.now())
        evaluate_class.create_summary_file(lamda, features_combination, test_file_to_use, train_file_to_use,
                                           summary_file_name, gradient_class.file_name, comp)

        logging.info('{}: Following Evaluation results for features {}'.
                     format(time.asctime(time.localtime(time.time())), features_combination))
        if not comp:
            logging.info('{}: Evaluation results are: \n {} \n'.format(time.asctime(time.localtime(time.time())),
                                                                       word_results_dictionary))
        logging.info('-----------------------------------------------------------------------------------')