def update_report_user_discarded_features(report_dict,lista_descartadas): report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_discarded_key()] = lista_descartadas return report_dict
def update_report_training_models_features(report_dict,diccionario_variables_scores): report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_score_relevant_key()] = diccionario_variables_scores return report_dict
def update_report_empty_constant_features(report_dict,lista_vacias_constantes): report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_empty_or_constant_key()] = lista_vacias_constantes return report_dict
def update_report_relevant_user_features(report_dict,lista_importantes): report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_requested_key()] = lista_importantes return report_dict
def create_report_current_model(report_dict, lista_modelos, ruta_relativa_datos_auxiliares, ruta_directorio_informes, enco): '''This funcion allows to get information of the current model in pdf format with the full charactristics fo the model''' env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html') if lista_modelos == []: #if process not completed template = env.get_template(ruta_relativa_datos_auxiliares + '/' +\ glod.get_incomplete_event_report_template_name()) template_vars = {glod.get_title_key(): "Incomplete Execution Report", glod.get_logo_key(): \ encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())), glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()], glod.get_event_key(): report_dict[glod.get_event_key()], glod.get_info_key(): " " +\ report_dict[glod.get_warning_key()] } with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file: output_file.write(template.render(template_vars)) with codecs.open(ruta_plantilla_temporal, glod.get_read_mode(), encoding=enco) as html_leido: pdf_resultante = os.path.join(ruta_directorio_informes, "report_" +\ report_dict[glod.get_event_key()]+"_incomplete.pdf") with open(pdf_resultante, glod.get_writebyte_mode()) as incomplete_rep: pisa.CreatePDF(html_leido.read(), incomplete_rep) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) else: lista_pares_modelo_indice = auxf.order_models_by_score_and_time(report_dict, lista_modelos) template = env.get_template(ruta_relativa_datos_auxiliares + '/' + glod.get_report_template_name()) for modelo in lista_modelos: if modelo in report_dict: observations_targets = "<p><strong>Target distribution of observations\ </strong></br>" final_targets_list = list(report_dict[glod.get_report_general_info_key()]\ [glod.get_report_generic_target_key()].keys()) for ob_target in auxf.natsorted(final_targets_list): observations_targets += " "+ "With target " +\ str(ob_target) + " :"+ str(report_dict[glod.get_report_general_info_key()]\ [glod.get_report_generic_target_key()][ob_target]) + "</br>" observations_targets += "</p>" variables_summary = "<p><strong>Summary of variables</strong></br>" discarded_for_event = report_dict[glod.get_report_general_info_key()]\ [glod.get_variables_key()][glod.get_user_discarded_key()] variables_summary += "<br><i><u>Deleted by the user at the begining:</i></u></br>" for deleted_var in report_dict[glod.get_report_general_info_key()]\ [glod.get_variables_key()][glod.get_deleted_by_user_key()]: variable_dis = glod.get_empty_string() if deleted_var in discarded_for_event: variable_dis = "<strong>" + deleted_var + "</strong>" else: variable_dis = deleted_var variables_summary += " "+ variable_dis + "</br>" variables_summary += " <i>*variables in bold were\ specified by the user to be discarded specifically for this event<i></br>" variables_summary += "</br>" variables_summary += "<br><i><u>Deleted in execution time(Empty or Constant)\ :</i></u></br>" for emp_con_var in report_dict[glod.get_report_general_info_key()]\ [glod.get_variables_key()][glod.get_empty_or_constant_key()]: variables_summary += " "+ emp_con_var + "</br>" variables_summary += "</br>" variables_summary += "<br><i><u>Requested for the event by the user:</i></u></br>" for req_var in report_dict[glod.get_report_general_info_key()]\ [glod.get_variables_key()][glod.get_user_requested_key()]: variables_summary += " "+ req_var + "</br>" variables_summary += "</br>" variables_summary += "<br><i><u>Used during the process:</i></u></br>" diccionario_relevantes_mif = report_dict[glod.get_report_general_info_key()]\ [glod.get_variables_key()][glod.get_score_relevant_key()] sorted_relevant_vars = sorted(diccionario_relevantes_mif.items(), key=operator.itemgetter(1), reverse=True) for relevant_var in sorted_relevant_vars: rel_variable = relevant_var[0] rel_variable = "<strong>" + rel_variable +' \ '+\ str(diccionario_relevantes_mif[rel_variable]) +"</strong>" variables_summary += " "+ rel_variable + "</br>" for relevant_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_used_in_process()]: if relevant_var not in diccionario_relevantes_mif: variables_summary += " "+ relevant_var + "</br>" variables_summary += " <i>*variables in bold were used\ to train the models<i></br>" variables_summary += "</p>" #Information about the model accuracy = "</br></br> \ <strong>Accuracy: "+\ str(float(round(report_dict[modelo][glod.get_accuracy_parameter_name()], 5)))+\ "</strong>" ranking = get_string_with_ranking_of_models(lista_pares_modelo_indice, modelo) model_info = "<p><strong>Parameters used to configure the model</strong></br>" for param in report_dict[modelo][glod.get_parameters_key()]: model_info += " <i>"+ param + "</i>: " +\ str(report_dict[modelo][glod.get_parameters_key()][param]) + "</br>" model_info += "</p>" time_info = "<p><strong>Time elapsed</strong></br>" tiempo_seleccion_parametros = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_init_key()] tiempo_entrenamiento = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_init_key()] time_info += " "+ "Parameters selection time: "+\ str(tiempo_seleccion_parametros) + "</br>" time_info += " "+ "Training time: "+\ str(tiempo_entrenamiento) + "</br>" time_info += "</p>" total_train = 0.0 vector_of_targets = [] vector_of_values_by_target = [] vector_of_percentages_by_target = [] train_distribution_info = "<p></br><strong>Training Data Distribution\ </strong></br>" for train_target in auxf.natsorted(list(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()].keys())): train_distribution_info += " "+ "With target " + str(train_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) + "</br>" vector_of_targets.append(train_target) vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target])) total_train += float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) train_distribution_info += "</p>" #getting null train accuracy null_train_accuracy = 0.0 for indice_t in range(len(vector_of_values_by_target)): vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_train, 4)) null_train_accuracy = max(vector_of_percentages_by_target) total_test = 0.0 vector_of_targets = [] vector_of_values_by_target = [] vector_of_percentages_by_target = [] test_distribution_info = "<p><strong>Test Data Distribution</strong></br>" for test_target in auxf.natsorted(list(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()].keys())): test_distribution_info += " "+ "With target " + str(test_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) + "</br>" vector_of_targets.append(test_target) vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target])) total_test += float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) test_distribution_info += "</p>" null_test_accuracy = 0.0 for indice_t in range(len(vector_of_values_by_target)): vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_test, 4)) null_test_accuracy = max(vector_of_percentages_by_target) event = report_dict[glod.get_event_key()] template_vars = {glod.get_title_key(): "Execution Report", glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())), glod.get_model_key(): modelo, glod.get_report_generic_target_key():\ report_dict[glod.get_objective_target_key()], glod.get_event_key(): event, glod.get_accuracy_parameter_name():\ str(accuracy)+"<br> \ <strong>Null train acc: "+\ str(null_train_accuracy)+"</strong>"+"<br> \ \ <strong>Null test acc: "+ str(null_test_accuracy)+\ "</strong></p>", glod.get_models_ranking_key(): ranking, glod.get_observations_targets_key(): observations_targets, glod.get_variables_summary_key(): variables_summary, glod.get_models_info_key(): model_info, glod.get_time_info_key(): time_info, glod.get_train_distribution_info_key(): train_distribution_info, glod.get_test_distribution_info_key(): test_distribution_info } template_vars[glod.get_metrics_info_key()] = glod.get_empty_string() for metric in report_dict[modelo][glod.get_metrics_micro_avg_key()]: template_vars[glod.get_metrics_info_key()] += "<p>"+"<strong>"+metric+"</strong>: " + report_dict[modelo][glod.get_metrics_micro_avg_key()][metric] +"</br>" template_vars[glod.get_metrics_info_key()] += "</p>" if glod.get_model_parameters_plot_name() in report_dict[modelo]: template_vars[glod.get_image_parameters_accuracy_key()] = encode_image(report_dict[modelo][glod.get_model_parameters_plot_name()].replace('\'', glod.get_empty_string())) if glod.get_confussion_matrix_train_path_key() in report_dict[modelo]: template_vars[glod.get_conf_train_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_train_path_key()].replace('\'', glod.get_empty_string())) if glod.get_confussion_matrix_test_path_key() in report_dict[modelo]: template_vars[glod.get_conf_test_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_test_path_key()].replace('\'', glod.get_empty_string())) if glod.get_learning_curve_key() in report_dict[modelo]: template_vars[glod.get_learning_curve_key()] = encode_image(report_dict[modelo][glod.get_learning_curve_key()].replace('\'', glod.get_empty_string())) metrics_by_label = "<table width='100%' border='1' cellspacing='0' cellpadding='5'>" keys = glod.get_empty_string() for elemento in auxf.natsorted(list(report_dict[modelo][glod.get_metrics_key()].keys())): if keys == glod.get_empty_string(): keys = report_dict[modelo][glod.get_metrics_key()][elemento].keys() metrics_by_label += "<tr><td align='center' class='black'>"+ glod.get_report_generic_target_key() +"</td>" for cabecera in keys: metrics_by_label += "<td align='center' class='black'>" + cabecera +"</td>" metrics_by_label += "</tr>" metrics_by_label += "<tr><td>" + elemento.replace('target_', glod.get_empty_string()) + "</td>" for key in keys: metrics_by_label += "<td>"+str(report_dict[modelo][glod.get_metrics_key()][elemento][key])+"</td>" metrics_by_label += "</tr>" metrics_by_label += "</table>" template_vars[glod.get_metrics_by_label_key()] = metrics_by_label #generamos el html with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file: output_file.write(template.render(template_vars)) #generamos el pdf with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html: pdf_resultante = os.path.join(ruta_directorio_informes, modelo + "_report_for_"+ event +".pdf") with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen: pisa.CreatePDF(read_html.read(), pdf_gen) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) if os.path.exists(ruta_plantilla_temporal): os.remove(ruta_plantilla_temporal)
def update_test_division(report_dict, key, valor): '''This funcion allows to register the distribution of observations that will be used to test the models in the report''' report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][key] = valor return report_dict
def update_report_user_discarded_features(report_dict, lista_descartadas): '''This funcion allows to register the discarded features in the report''' report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\ [glod.get_user_discarded_key()] = lista_descartadas return report_dict
def update_report_relevant_user_features(report_dict, lista_importantes): '''This funcion allows to register the relevant features in the report''' report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\ [glod.get_user_requested_key()] = lista_importantes return report_dict
def update_report_empty_constant_features(report_dict, lista_vacias_constantes): '''This funcion allows to register the empty or constant features in the report''' report_dict[glod.get_report_general_info_key()][glod.get_variables_key()]\ [glod.get_empty_or_constant_key()] = lista_vacias_constantes return report_dict