def main(train_file_to_use, test_file_to_use, test_type, features_combination_list, lamda, comp): # for perm in itertools.combinations(features_combination_list_sub, 4): # features_combination_list.append(list(perm)) # start all combination of features for features_combination in features_combination_list: print('{}: Start creating MEMM for features : {}'.format(time.asctime(time.localtime(time.time())), features_combination)) logging.info('{}: Start creating MEMM for features : {}'.format(time.asctime(time.localtime(time.time())), features_combination)) train_start_time = time.time() memm_class = MEMM(directory, train_file_to_use, features_combination) logging.info('{}: Finish MEMM for features : {}'.format(time.asctime(time.localtime(time.time())), features_combination)) print('{}: Finish MEMM for features : {}'.format(time.asctime(time.localtime(time.time())), features_combination)) print('{}: Start gradient for features : {} and lambda: {}'. format(time.asctime(time.localtime(time.time())), features_combination, lamda)) logging.info('{}: Start gradient for features : {} and lambda: {}'. format(time.asctime(time.localtime(time.time())), features_combination, lamda)) gradient_class = Gradient(model=memm_class, lambda_value=lamda) gradient_result = gradient_class.gradient_descent() train_run_time = (time.time() - train_start_time) / 60.0 print('{}: Finish gradient for features : {} and lambda: {}. run time: {}'. format(time.asctime(time.localtime(time.time())), features_combination, lamda, train_run_time)) logging.info('{}: Finish gradient for features : {} and lambda: {}. run time: {}'. format(time.asctime(time.localtime(time.time())), features_combination, lamda, train_run_time)) weights = gradient_result.x # np.savetxt(gradient_file, weights, delimiter=",") viterbi_start_time = time.time() print('{}: Start viterbi'.format((time.asctime(time.localtime(time.time()))))) viterbi_class = viterbi(memm_class, data_file=test_file_to_use, w=weights) viterbi_result = viterbi_class.viterbi_all_data viterbi_run_time = (time.time() - viterbi_start_time) / 60.0 print('{}: Finish viterbi. run time: {}'.format((time.asctime(time.localtime(time.time()))), viterbi_run_time)) logging.info('{}: Finish viterbi. run time: {}'.format((time.asctime(time.localtime(time.time()))), viterbi_run_time)) write_file_name = datetime.now().strftime(directory + 'file_results/result_MEMM_most_common_tags_' + test_type + '%d_%m_%Y_%H_%M.wtag') confusion_file_name = datetime.now().strftime(directory + 'confusion_files/CM_MEMM_most_common_tags_' + test_type + '%d_%m_%Y_%H_%M.xls') evaluate_class = Evaluate(memm_class, test_file_to_use, viterbi_result, write_file_name, confusion_file_name, comp=comp) if not comp: word_results_dictionary = evaluate_class.run() if comp: evaluate_class.write_result_doc() logging.info('{}: The model hyper parameters: \n lambda:{} \n test file: {} \n train file: {}' .format(time.asctime(time.localtime(time.time())), lamda, test_file_to_use, train_file_to_use)) logging.info('{}: Related results files are: \n {} \n {}'. format(time.asctime(time.localtime(time.time())), write_file_name, confusion_file_name)) # print(word_results_dictionary) summary_file_name = '{0}analysis/summary_{1}_{2.day}_{2.month}_{2.year}_{2.hour}_{2.minute}.csv' \ .format(directory, test_type, datetime.now()) evaluate_class.create_summary_file(lamda, features_combination, test_file_to_use, train_file_to_use, summary_file_name, gradient_class.file_name, comp) logging.info('{}: Following Evaluation results for features {}'. format(time.asctime(time.localtime(time.time())), features_combination)) if not comp: logging.info('{}: Evaluation results are: \n {} \n'.format(time.asctime(time.localtime(time.time())), word_results_dictionary)) logging.info('-----------------------------------------------------------------------------------')