def create_report_prediction(report_dict, event_target, ruta_relativa_datos_auxiliares, ruta_directorio_informes, enco): '''This funcion allows to get the pdf for the current model with the information obtained fate rthe prediction phase''' env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html') template = env.get_template(ruta_relativa_datos_auxiliares + '/' + glod.get_prediction_template_name()) event = event_target[0] target_to_predict = event_target[1] dic_info_event = report_dict[event] summary_target_to_predict = glod.get_empty_string() template_vars = {glod.get_title_key(): "Prediction report for " + event, glod.get_logo_key():\ encode_image(report_dict[glod.get_logo_key()].replace('\'', glod.get_empty_string())), } if target_to_predict in dic_info_event: model = str(dic_info_event[target_to_predict][glod.get_best_model_key()]) model = model.split("(") model = model[0] summary_target_to_predict = "<p><strong>Target: <strong>" + ' ' +\ target_to_predict + "</br></br>" summary_target_to_predict += "<p><strong>Model: <strong>" + ' ' + model + "</br>" summary_target_to_predict += "<p><strong>Accuracy: <strong>" + ' ' +\ str(dic_info_event[target_to_predict][glod.get_accuracy_parameter_name()]) + "</br>" summary_target_to_predict += "<strong>Correct classifications: <strong>" + ' ' +\ str(dic_info_event[target_to_predict]['Correct']) + "</br>" summary_target_to_predict += "<strong>Total number of observations: <strong>" + ' ' +\ str(dic_info_event[target_to_predict]['Total']) + "</br>" summary_target_to_predict += "<strong>Total number of unknown observations classified: <strong>" + ' ' + str(dic_info_event[target_to_predict]['Predicted']) + "</br>" cm_target = encode_image(dic_info_event[target_to_predict]\ ['target_to_predict_cm'].replace('\'', glod.get_empty_string())) template_vars['target_to_predict_cm'] = cm_target template_vars['target'] = summary_target_to_predict with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file: output_file.write(template.render(template_vars)) with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html: pdf_resultante = os.path.join(ruta_directorio_informes,\ "Prediction_report_for_"+ event +".pdf") with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen: pisa.CreatePDF(read_html.read(), pdf_gen) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) if os.path.exists(ruta_plantilla_temporal): os.remove(ruta_plantilla_temporal)
def create_report_current_dictionary_models(dictionary_of_models,ruta_relativa_datos_auxiliares,ruta_directorio_resultados,list_of_parameters_models_events_dict,logo_path,enco): env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html') template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'dictionary_models_template.html') tabulacion = " " template_vars = {glod.get_title_key(): "Report of the information of the Dictionary of models", glod.get_logo_key(): encode_image(logo_path.replace('\'','')) } #['learning', 'features', 'original_features', 'model_path' list_of_parameters_models_events_dict list_elements = [list_of_parameters_models_events_dict[0],list_of_parameters_models_events_dict[3],list_of_parameters_models_events_dict[1]] #list_of_parameters_models_events_dict[4] informacion= "" for event in dictionary_of_models: informacion+= "<strong><u>"+ event +"</u></strong></br></br>" for target in dictionary_of_models[event]: informacion+= tabulacion + tabulacion + "<strong><i>Target:</i></strong>" + " " + target + "</br>" for key in list_elements: informacion+=tabulacion + tabulacion + "<strong><i>" + key + ": </i></strong>" if(type(list()) == type(dictionary_of_models[event][target][key])): informacion+="<br>" contador = 0 ordered_list_features = sorted(dictionary_of_models[event][target][key]) while(contador < len(ordered_list_features)): element = ordered_list_features[contador] informacion+=tabulacion + tabulacion + tabulacion +tabulacion + element + "</br>" contador+=1 else: informacion+= dictionary_of_models[event][target][key] + "</br>" if(key == list_of_parameters_models_events_dict[0]): informacion+= tabulacion + tabulacion + "<strong><i>best model: </i></strong> " + dictionary_of_models[event][target][list_of_parameters_models_events_dict[1]].split('_')[-1].split('.')[0] + "</br>" #get model name if(dictionary_of_models[event][target][key] == glod.get_unsupervised_name()): informacion+= tabulacion + tabulacion + "<strong><i>dic_reassingment: </i></strong> " + str(dictionary_of_models[event][target][list_of_parameters_models_events_dict[2]]) + "</br>" informacion+="</br>" if(informacion == ""): informacion = "No models were created yet" template_vars[glod.get_info_key()] = informacion #html with codecs.open(ruta_plantilla_temporal,'w',encoding='utf-8') as output_file: renderizado = template.render(template_vars) output_file.write(renderizado) #pdf with codecs.open(ruta_plantilla_temporal, mode='r',encoding=enco) as read_html: pdf_resultante=os.path.join(ruta_directorio_resultados,"Current_status_dictionary_events_and_models.pdf") with open(pdf_resultante, mode='wb') as pdf_gen: pisa.CreatePDF(read_html.read().encode(enco, 'ignore').decode(enco),pdf_gen) if(os.path.exists(ruta_plantilla_temporal)): os.remove(ruta_plantilla_temporal)
def create_basic_report_data_dict(umbral,target,main_metric,feature_selection_method,penalize_falses,lista_variables_descartadas,ruta_logo): report_data = {glod.get_title_key(): "Overview With Execution Information", glod.get_logo_key():ruta_logo, glod.get_umbral_key(): str(umbral), glod.get_main_metric_key(): str(main_metric), glod.get_feature_selection_key(): str(feature_selection_method), glod.get_penalization_name(): str(penalize_falses), glod.get_objective_target_key(): target, glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas}, glod.get_general_info_execution_key():'' } return report_data
def create_report_data_dict(evento,umbral,target,lista_variables_descartadas,ruta_logo): report_data = {glod.get_objective_target_key(): target, glod.get_event_key():evento, glod.get_logo_key():ruta_logo, glod.get_report_general_info_key():{glod.get_report_generic_target_key():{}, glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas,glod.get_empty_or_constant_key():[],glod.get_score_relevant_key():[]}, glod.get_training_division_key():{}, glod.get_test_division_key():{}, }, glod.get_umbral_key(): str(umbral), glod.get_warning_key(): '' } return report_data
def create_report_data_dict(evento, umbral, target, lista_variables_descartadas, ruta_logo): '''This funcion allows to create the structure for the report data dictionary for the current event''' report_data = {glod.get_objective_target_key(): target, glod.get_event_key():evento, glod.get_logo_key():ruta_logo, glod.get_report_general_info_key():{glod.get_report_generic_target_key():{}, glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas, glod.get_empty_or_constant_key():[], glod.get_score_relevant_key():[]}, glod.get_training_division_key():{}, glod.get_test_division_key():{}, }, glod.get_umbral_key(): str(umbral), glod.get_warning_key(): glod.get_empty_string() } return report_data
def create_basic_report_data_dict(basic_parameters, lista_variables_descartadas, ruta_logo): '''This funcion allows to create the structure for the report data dictionary''' umbral = basic_parameters[0] target = basic_parameters[1] main_metric = basic_parameters[2] feature_selection_method = basic_parameters[3] penalize_falses = basic_parameters[4] report_data = {glod.get_title_key(): "Overview With Execution Information", glod.get_logo_key():ruta_logo, glod.get_umbral_key(): str(umbral), glod.get_main_metric_key(): str(main_metric), glod.get_feature_selection_key(): str(feature_selection_method), glod.get_penalization_name(): str(penalize_falses), glod.get_objective_target_key(): target, glod.get_variables_key():{glod.get_deleted_by_user_key():\ lista_variables_descartadas}, glod.get_general_info_execution_key():glod.get_empty_string() } return report_data
def create_report_current_model(report_dict,lista_modelos,ruta_relativa_datos_auxiliares,ruta_directorio_informes,enco): env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html') if(lista_modelos == []): #if process not completed template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'incomplete_event_report_template.html') #usamos la plantilla de informes incompletos template_vars = {glod.get_title_key(): "Incomplete Execution Report", glod.get_logo_key(): encode_image(report_dict[glod.get_logo_key()].replace('\'','')), glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()], glod.get_event_key(): report_dict[glod.get_event_key()], glod.get_info_key(): " " + report_dict['Warning_info'] } #html with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file: output_file.write(template.render(template_vars)) #pdf with codecs.open(ruta_plantilla_temporal, 'r') as html_leido: pdf_resultante=os.path.join(ruta_directorio_informes,"report_"+report_dict[glod.get_event_key()]+"_incomplete.pdf") with open(pdf_resultante, "wb") as incomplete_rep: pisa.CreatePDF(html_leido.read(),incomplete_rep) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) else: lista_pares_modelo_indice = auxf.order_models_by_score_and_time(report_dict,lista_modelos) template = env.get_template(ruta_relativa_datos_auxiliares + '/' +'report_template.html') #using standard template for modelo in lista_modelos: if(modelo in report_dict): observations_targets="<p><strong>Target distribution of observations</strong></br>" for ob_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()].keys()): observations_targets+=" "+ "With target " + str(ob_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()][ob_target]) + "</br>" observations_targets+="</p>" variables_summary="<p><strong>Summary of variables</strong></br>" discarded_for_event = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_discarded_key()] variables_summary+="<br><i><u>Deleted by the user at the begining:</i></u></br>" for deleted_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_deleted_by_user_key()]: variable_dis='' if deleted_var in discarded_for_event: variable_dis = "<strong>" + deleted_var + "</strong>" else: variable_dis = deleted_var variables_summary+=" "+ variable_dis + "</br>" variables_summary+=" <i>*variables in bold were specified by the user to be discarded specifically for this event<i></br>" variables_summary+="</br>" variables_summary+="<br><i><u>Deleted in execution time(Empty or Constant):</i></u></br>" for emp_con_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_empty_or_constant_key()]: variables_summary+=" "+ emp_con_var + "</br>" variables_summary+="</br>" variables_summary+="<br><i><u>Requested for the event by the user:</i></u></br>" for req_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_requested_key()]: variables_summary+=" "+ req_var + "</br>" variables_summary+="</br>" variables_summary+="<br><i><u>Used during the process:</i></u></br>" diccionario_relevantes_mif = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_score_relevant_key()] sorted_relevant_vars = sorted(diccionario_relevantes_mif.items(), key=operator.itemgetter(1), reverse=True) for relevant_var in sorted_relevant_vars: rel_variable= relevant_var[0] rel_variable = "<strong>" + rel_variable +' '+ str(diccionario_relevantes_mif[rel_variable]) +"</strong>" variables_summary+=" "+ rel_variable + "</br>" for relevant_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_used_in_process()]: if (relevant_var not in diccionario_relevantes_mif) : variables_summary+=" "+ relevant_var + "</br>" variables_summary+=" <i>*variables in bold were used to train the models<i></br>" variables_summary+="</p>" #Information about the model accuracy = "</br></br> <strong>Accuracy: "+ str(float(round(report_dict[modelo][glod.get_accuracy_parameter_name()],5)))+"</strong>" ranking = get_string_with_ranking_of_models(lista_pares_modelo_indice,modelo) model_info = "<p><strong>Parameters used to configure the model</strong></br>" for param in report_dict[modelo][glod.get_parameters_key()]: model_info+= " <i>"+ param + "</i>: " + str(report_dict[modelo][glod.get_parameters_key()][param]) + "</br>" model_info+="</p>" time_info = "<p><strong>Time elapsed</strong></br>" tiempo_seleccion_parametros = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_init_key()] tiempo_entrenamiento = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_init_key()] time_info+=" "+ "Parameters selection time: "+ str(tiempo_seleccion_parametros) + "</br>" time_info+=" "+ "Training time: "+ str(tiempo_entrenamiento) + "</br>" time_info+="</p>" total_train = 0.0 vector_of_targets = [] vector_of_values_by_target = [] vector_of_percentages_by_target = [] train_distribution_info ="<p></br><strong>Training Data Distribution</strong></br>" for train_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()].keys()): train_distribution_info+=" "+ "With target " + str(train_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) + "</br>" vector_of_targets.append(train_target) vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target])) total_train+=float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) train_distribution_info+="</p>" #getting null train accuracy null_train_accuracy = 0.0 for indice_t in range(len(vector_of_values_by_target)): vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_train,4)) null_train_accuracy = max(vector_of_percentages_by_target) total_test = 0.0 vector_of_targets = [] vector_of_values_by_target = [] vector_of_percentages_by_target = [] test_distribution_info ="<p><strong>Test Data Distribution</strong></br>" for test_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()].keys()): test_distribution_info+=" "+ "With target " + str(test_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) + "</br>" vector_of_targets.append(test_target) vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target])) total_test+=float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) test_distribution_info+="</p>" null_test_accuracy = 0.0 for indice_t in range(len(vector_of_values_by_target)): vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_test,4)) null_test_accuracy = max(vector_of_percentages_by_target) event = report_dict[glod.get_event_key()] template_vars = {glod.get_title_key(): "Execution Report", glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')), glod.get_model_key(): modelo, glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()], glod.get_event_key(): event, glod.get_accuracy_parameter_name(): str(accuracy)+"<br> <strong>Null train acc: "+ str(null_train_accuracy)+"</strong>"+"<br> <strong>Null test acc: "+ str(null_test_accuracy)+ "</strong></p>", glod.get_models_ranking_key(): ranking, glod.get_observations_targets_key(): observations_targets, glod.get_variables_summary_key(): variables_summary, glod.get_models_info_key(): model_info, glod.get_time_info_key(): time_info, glod.get_train_distribution_info_key(): train_distribution_info, glod.get_test_distribution_info_key(): test_distribution_info } template_vars[glod.get_metrics_info_key()] ="" for metric in report_dict[modelo][glod.get_metrics_micro_avg_key()]: template_vars[glod.get_metrics_info_key()] +="<p>"+"<strong>"+metric+"</strong>: " + report_dict[modelo][glod.get_metrics_micro_avg_key()][metric] +"</br>" template_vars[glod.get_metrics_info_key()] +="</p>" if glod.get_model_parameters_plot_name() in report_dict[modelo]: template_vars[glod.get_image_parameters_accuracy_key()] = encode_image(report_dict[modelo][glod.get_model_parameters_plot_name()].replace('\'','')) if glod.get_confussion_matrix_train_path_key() in report_dict[modelo]: template_vars[glod.get_conf_train_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_train_path_key()].replace('\'','')) if glod.get_confussion_matrix_test_path_key() in report_dict[modelo]: template_vars[glod.get_conf_test_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_test_path_key()].replace('\'','')) if(glod.get_learning_curve_key() in report_dict[modelo]): template_vars[glod.get_learning_curve_key()] = encode_image(report_dict[modelo][glod.get_learning_curve_key()].replace('\'','')) metrics_by_label = "<table width='100%' border='1' cellspacing='0' cellpadding='5'>" keys = '' for elemento in auxf.natsorted(report_dict[modelo][glod.get_metrics_key()].keys()): if(keys == ''): keys = report_dict[modelo][glod.get_metrics_key()][elemento].keys() metrics_by_label+="<tr><td align='center' class='black'>"+ glod.get_report_generic_target_key() +"</td>" for cabecera in keys: metrics_by_label+="<td align='center' class='black'>" + cabecera +"</td>" metrics_by_label += "</tr>" metrics_by_label+= "<tr><td>" + elemento.replace('target_','') + "</td>" for key in keys: metrics_by_label += "<td>"+str(report_dict[modelo][glod.get_metrics_key()][elemento][key])+"</td>" metrics_by_label+= "</tr>" metrics_by_label+="</table>" template_vars[glod.get_metrics_by_label_key()] = metrics_by_label #generamos el html with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file: output_file.write(template.render(template_vars)) #generamos el pdf with codecs.open(ruta_plantilla_temporal, mode='r',encoding=enco) as read_html: pdf_resultante=os.path.join(ruta_directorio_informes,modelo + "_report_for_"+ event +".pdf") with open(pdf_resultante, mode='wb') as pdf_gen: pisa.CreatePDF(read_html.read(),pdf_gen) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) if(os.path.exists(ruta_plantilla_temporal)): os.remove(ruta_plantilla_temporal)
def create_report_current_execution(report_dict,lista_eventos,lista_variables_usuario,lista_listas_variables_descartadas,lista_aprendizajes,lista_modelos, diccionario_aprendizajes, ruta_relativa_datos_auxiliares, ruta_directorio_resultados,enco): env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html') template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'general_execution_template.html') template_vars = {glod.get_title_key(): report_dict[glod.get_title_key()], glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')), glod.get_general_info_execution_key():'' } #General parameters (target,umbral,variables_descartadas) target = report_dict[glod.get_objective_target_key()] umbral = report_dict[glod.get_umbral_key()] main_metric = report_dict[glod.get_main_metric_key()] feature_selection_method = report_dict[glod.get_feature_selection_key()] penalize_falses = report_dict[glod.get_penalization_name()] lista_variables_descartadas = report_dict[glod.get_variables_key()][glod.get_deleted_by_user_key()] tabulacion = " " informacion= "<h3>Common Parameters </h3></p>" informacion+= tabulacion+tabulacion + "<i>Objective Target: </i>" + target + "</br></br>" informacion+=tabulacion+tabulacion + "<i>Percentil for Scoring Function: </i>" + umbral + "</br></br>" informacion+=tabulacion+tabulacion + "<i>Main metric: </i>" + main_metric + "</br></br>" informacion+=tabulacion+tabulacion + "<i>Feature selection method: </i>" + feature_selection_method + "</br></br>" informacion+=tabulacion+tabulacion + "<i>Penalize falses: </i>" + penalize_falses + "</br></br>" informacion+=tabulacion+tabulacion + "<i>Common Discarded Variables:</i></br>" for variable_descartada in lista_variables_descartadas: informacion+=tabulacion+tabulacion+tabulacion + variable_descartada + "</br>" if(lista_variables_descartadas == []): informacion+=tabulacion+"No variables were selected to be discarded</br>" informacion+="</p>" informacion+= "<h3>Events to be processed: </h3><p>" for indice in range(len(lista_eventos)): informacion+=tabulacion+"<strong>"+ lista_eventos[indice] + "</strong></br>" informacion+=tabulacion+tabulacion+"<i>Important features for the user:</i> </br>" if(lista_variables_usuario[indice]): for variable in lista_variables_usuario[indice]: informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>" else: informacion+=tabulacion+tabulacion+tabulacion+"No important features were specified</br>" informacion+="</br>" informacion+=tabulacion+tabulacion+"<i>Discarded variables by the user:</i> </br>" if(lista_listas_variables_descartadas[indice]): for variable in lista_listas_variables_descartadas[indice]: informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>" else: informacion+=tabulacion+tabulacion+tabulacion+"No variables were discarded</br>" informacion+="</br>" informacion += tabulacion+tabulacion+"<i>Learnings to be applied: </i></br>" aprendizaje = lista_aprendizajes[indice] modelos = lista_modelos[indice] if aprendizaje == glod.get_all_learning_modes_name():#looping supervised models informacion += tabulacion+tabulacion+tabulacion+"<u>" +\ str(diccionario_aprendizajes[1]) + "</u>:</br>" modelos_sup = modelos[0] for modelo_act in modelos_sup: informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>" informacion += "</br>" informacion += tabulacion+tabulacion+tabulacion+"<u>" +\ str(diccionario_aprendizajes[2]) + "</u>:</br>" modelos_unsup = modelos[1] for modelo_act in modelos_unsup: informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>" informacion += "</br>" else: informacion += tabulacion+tabulacion+tabulacion+"<u>"+aprendizaje + "</u>:</br>" for modelo_act in modelos: informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>" informacion += "</p>" template_vars[glod.get_general_info_execution_key()] = informacion with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file: output_file.write(template.render(template_vars)) with codecs.open(ruta_plantilla_temporal, 'r') as html_leido: pdf_resultante=os.path.join(ruta_directorio_resultados,"General_execution_report_"+ target +".pdf") with open(pdf_resultante, "wb") as gen_report: pisa.CreatePDF(html_leido.read(),gen_report) logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler()) if(os.path.exists(ruta_plantilla_temporal)): os.remove(ruta_plantilla_temporal)
def create_report_current_dictionary_models(dictionary_of_models, basic_paths, list_of_parameters_models_events_dict, logo_path, enco): '''This funcion allows to get the pdf file with the current status of the models, relevant features and the events to which are applied''' ruta_relativa_datos_auxiliares = basic_paths[0] ruta_directorio_resultados = basic_paths[1] env = Environment(loader=FileSystemLoader('.')) ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares, 'temp_html.html') template = env.get_template(ruta_relativa_datos_auxiliares + '/' +\ glod.get_dictionary_models_template_name()) tabulacion = " " template_vars = {glod.get_title_key(): "Report of the information of the Dictionary of models", glod.get_logo_key(): encode_image(logo_path.replace('\'', glod.get_empty_string())) } list_elements = [list_of_parameters_models_events_dict[0], list_of_parameters_models_events_dict[3], list_of_parameters_models_events_dict[1]] informacion = glod.get_empty_string() for event in dictionary_of_models: informacion += "<strong><u>"+ event +"</u></strong></br></br>" for target in dictionary_of_models[event]: informacion += tabulacion + tabulacion + "<strong><i>Target:</i></strong>" + " " + target + "</br>" for key in list_elements: informacion += tabulacion + tabulacion + "<strong><i>" + key + ": </i></strong>" if type(list()) == type(dictionary_of_models[event][target][key]): informacion += "<br>" contador = 0 ordered_list_features = sorted(dictionary_of_models[event][target][key]) while contador < len(ordered_list_features): element = ordered_list_features[contador] informacion += tabulacion + tabulacion + tabulacion +tabulacion + element + "</br>" contador += 1 else: informacion += dictionary_of_models[event][target][key] + "</br>" if key == list_of_parameters_models_events_dict[0]: informacion += tabulacion + tabulacion + "<strong><i>best model: </i></strong> " + dictionary_of_models[event][target][list_of_parameters_models_events_dict[1]].split('_')[-1].split('.')[0] + "</br>" #get model name if dictionary_of_models[event][target][key] == glod.get_unsupervised_name(): informacion += tabulacion + tabulacion + "<strong><i>dic_reassingment: </i></strong> " + str(dictionary_of_models[event][target][list_of_parameters_models_events_dict[2]]) + "</br>" informacion += "</br>" if informacion == glod.get_empty_string(): informacion = "No models were created yet" template_vars[glod.get_info_key()] = informacion #html with codecs.open(ruta_plantilla_temporal, glod.get_write_mode(), encoding=enco) as output_file: renderizado = template.render(template_vars) output_file.write(renderizado) #pdf with codecs.open(ruta_plantilla_temporal, mode=glod.get_read_mode(), encoding=enco) as read_html: pdf_resultante = os.path.join(ruta_directorio_resultados, "Current_status_dictionary_events_and_models.pdf") with open(pdf_resultante, mode=glod.get_writebyte_mode()) as pdf_gen: pisa.CreatePDF(read_html.read().encode(enco, 'ignore').decode(enco), pdf_gen) if os.path.exists(ruta_plantilla_temporal): os.remove(ruta_plantilla_temporal)
def prediction_function(BASE_PATH): ''' Step 0: Reading configuration parameters and creating log files''' '''Creating variable that parses the configuration file. If the file is not found, an exception is thrown and finishes the execution''' path_to_configuration_file = os.path.join(BASE_PATH, glod.get_config_parser_name()) config_parser = conp.ConfigParser() config_parser.optionxform = str enco = glod.get_encoding() if (os.path.exists(path_to_configuration_file)): config_parser_file = open(path_to_configuration_file, encoding=enco) config_parser.readfp(config_parser_file) else: raise Exception('Configuration file (conf.ini) was not found') logs_section = glod.get_log_section_name() auxiliary_data_section = glod.get_auxiliary_section_name() input_data_section = glod.get_input_section_name() prediction_section = glod.get_prediction_section_name() '''Creating log files''' log_path = os.path.join( BASE_PATH, config_parser.get(logs_section, glod.get_log_directory_name())) execution_log_path = os.path.join( log_path, config_parser.get(logs_section, glod.get_prediction_log_execution_name()) + '.' + glod.get_log_files_extension()) time_log_path = os.path.join( log_path, config_parser.get(logs_section, glod.get_prediction_log_time_execution_name()) + '.' + glod.get_log_files_extension()) ruta_modelos_prediccion = config_parser.get( prediction_section, glod.get_path_to_prediction_models_name()) auxf.create_directory(log_path) step_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([execution_log_path], '>>>>>>Prediction Phase <<<<<<<< \n' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", 0, enco) repg.register_log([execution_log_path], '>>>> Step 0 - Reading parameters from conf.ini \n' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log([time_log_path], '>>>>Step 0 starts:\n', 0, enco) '''Reading from conf.ini necessary variables for the prediction phase''' extension = glod.get_input_files_extension() name = config_parser.get(input_data_section, glod.get_event_name_feature_name()) observation_number = config_parser.get(input_data_section, glod.get_obsnumber_parameter_name()) input_files_delimiter_not_catalogued_data = config_parser.get( prediction_section, glod.get_delimiter_non_catalogued_data_name()) input_files_delimiter_not_catalogued_data = input_files_delimiter_not_catalogued_data.replace( '\'', '') label_non_catalogued_data = int( config_parser.get(input_data_section, glod.get_non_catalogued_label_name())) maximum_number_of_files_to_catalogue = int( config_parser.get(prediction_section, glod.get_number_of_files_parameter_name())) path_to_directory_with_input_files_to_catalogue = os.path.join( BASE_PATH, config_parser.get(prediction_section, glod.get_name_directory_to_input_files_catalogue())) step_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([execution_log_path], '>>>> Step 0 ends \n' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log( [time_log_path], '>>>>Step 0 - Reading parameters from conf.ini total elapsed time :' + str(step_finish_time - step_init_time) + '\n', '', enco) ''' Step 1: Reading observations from files and concatenating them ''' step_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log( [execution_log_path], '>>>>Step 1 Loading observations from files into dataframes \n' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log([time_log_path], '>>>>Step 1 starts:\n', '', enco) vector_fullpaths_to_input_files_with_observations_to_catalogue = auxf.get_all_files_in_dir_with_extension( path_to_directory_with_input_files_to_catalogue, maximum_number_of_files_to_catalogue, extension) ''' Path to file with relevant variables ''' auxiliary_directory_filename = config_parser.get( auxiliary_data_section, glod.get_auxiliary_directory_parameter_name()) path_to_directory_auxiliary_files = os.path.join( BASE_PATH, config_parser.get(auxiliary_data_section, glod.get_auxiliary_directory_parameter_name())) report_dict = { glod.get_logo_key(): "'" + os.path.join(path_to_directory_auxiliary_files, glod.get_logo_name()) + "'" } ''' Substep 1.1 - Reading input files ''' substep_init_time = datetime.datetime.fromtimestamp(time.time()) list_registers_to_catalogue = [] repg.register_log([execution_log_path], '>>>>Step 1.1 \n', '', enco) for i in range( len(vector_fullpaths_to_input_files_with_observations_to_catalogue) ): repg.register_log( [execution_log_path], '>>Reading Csv to predict number ' + str(i) + ': ' + vector_fullpaths_to_input_files_with_observations_to_catalogue[i] + '\n', '', enco) print( "To catalogue : ", vector_fullpaths_to_input_files_with_observations_to_catalogue[i]) print("\n") original_data = pd.read_csv( vector_fullpaths_to_input_files_with_observations_to_catalogue[i], sep=input_files_delimiter_not_catalogued_data) list_registers_to_catalogue.append(original_data) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([time_log_path], '>>>>Subtep 1.1 - reading csv total elapsed time: ' + str(substep_finish_time - substep_init_time) + '\n', '', enco) repg.register_log([execution_log_path], '>>>>Subtep 1.1 - reading csv total elapsed time: ' + str(substep_finish_time - substep_init_time) + '\n', '', enco) if (list_registers_to_catalogue == list()): repg.register_log( [time_log_path], '>>>> Prediction process finished: Observations were not found ' + str(substep_finish_time - substep_init_time) + '\n', '', enco) repg.register_log( [execution_log_path], '>>>> Prediction process finished: Obervations were not found ' + str(substep_finish_time - substep_init_time) + '\n', '', enco) print('>>>> Prediction process finished: Obervations were not found ') ''' Substep 1.2 - Concatenating read csv''' substep_init_time = datetime.datetime.fromtimestamp(time.time()) df_data_to_catalogue = pd.concat(list_registers_to_catalogue) reco_pandas_features = [] for feature in df_data_to_catalogue.columns: reco_pandas_features.append(feature) df_data_to_catalogue.columns = reco_pandas_features try: df_data_to_catalogue[name] except Exception as e: repg.register_log([ execution_log_path ], '>> An Eception has happened: Incorrect name of feature with events ' + str(e) + ' ' + datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S') + '\n', '', enco) print( '>> An Exception has happened, check configuration file: Incorrect name of feature with events "' + str(e) + '"') error_trace = "Full trace:\n" + str(traceback.format_exc()) repg.register_log([execution_log_path], error_trace, '', enco) raise Exception(e) ''' Erasing indexes introduced by pandas, if any ''' if 'index' in df_data_to_catalogue.columns: df_data_to_catalogue = df_data_to_catalogue.drop('index', axis=1) if 'Unnamed: 0' in df_data_to_catalogue.columns: df_data_to_catalogue = df_data_to_catalogue.drop('Unnamed: 0', axis=1) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) step_finish_time = datetime.datetime.fromtimestamp(time.time()) total_time_step_1 = step_finish_time - step_init_time repg.register_log([ time_log_path ], '>>>> Substep 1.2 - Loading observations from files into dataframes total elapsed time: ' + str(substep_finish_time - substep_init_time) + "\n", '', enco) repg.register_log([execution_log_path], '>>>>Substep 1.2 ends ' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log([ time_log_path ], '>>>> Step 1 - Reading and concatenating csv into dataframe total elapsed time: ' + str(total_time_step_1) + "\n", '', enco) repg.register_log([execution_log_path], '>>>>Step 1 ends ' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) ''' Step 2: Reading prediction models dictionary and preloading best pkl models''' step_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log( [execution_log_path], '>>>>Step 2 Reading models dict and preload best pkl models \n' + step_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log([time_log_path], '>>>>Step 2 starts:\n', '', enco) '''Getting dictionary features in order to recodify and the events to catalogue''' substep_init_time = datetime.datetime.fromtimestamp(time.time()) dic_event_model, handler = auxf.open_dictionary_pickle_format_for_reading( ruta_modelos_prediccion) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([ time_log_path ], '>>>> Substep 2.1 - Reading dictionary with models total elapsed time: ' + str(substep_finish_time - substep_init_time) + "\n", '', enco) repg.register_log([execution_log_path], '>>>>Substep 2.1 Reading dictionary with models ends ' + substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) '''Preloading models in memory''' substep_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([execution_log_path], '>>>>Substep 2.2 - Preloading best pkl models \n' + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) list_features_to_catalogue = [] dictionary_of_events_preloaded_models = {} print(''' Events, target and predictions models ############################################################################################## ## ##''' ) for event in dic_event_model.keys(): dictionary_of_events_preloaded_models[event] = {} try: for target_trained in dic_event_model[event]: dictionary_of_events_preloaded_models[event][ target_trained] = {} best_model = joblib.load(dic_event_model[event][target_trained] [glod.get_model_path_key()]) print( "\t\t\t\t", dic_event_model[event][target_trained][ glod.get_model_path_key()]) dictionary_of_events_preloaded_models[event][target_trained][ glod.get_best_model_key()] = best_model list_features_to_catalogue += dic_event_model[event][ target_trained][glod.get_current_features_key()] except Exception as e: print(''' ## ## ##############################################################################################''' ) print( 'The pkl neccesary for the prediction of the observations of ' + event + ' was not found ') repg.register_log( [execution_log_path], 'The pkl neccesary for the prediction of the observations of ' + event + ' was not found ' + substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) raise Exception(e) print(''' ## ## ##############################################################################################''' ) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log( [time_log_path], '>>>>Substep 2.2 - Preloading best pkl models total elapsed time: ' + str(substep_finish_time - substep_init_time) + '\n', '', enco) repg.register_log([execution_log_path], '>>>>Substep 2.2 - Preloading best pkl models ends \n' + substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) step_finish_time = datetime.datetime.fromtimestamp(time.time()) total_time_step_2 = step_finish_time - step_init_time repg.register_log([ time_log_path ], '>>>> Step 2 - Reading models dict and preload best pkl models total elapsed time: ' + str(total_time_step_2) + "\n", '', enco) repg.register_log([execution_log_path], '>>>>Step 2 ends \n' + step_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) ''' Step 3: Classifying observations usin preloaded models ''' maximum_number_of_observations_to_catalogue = len(df_data_to_catalogue) step_init_time = datetime.datetime.fromtimestamp(time.time()) substep_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([time_log_path], '>>>> Step 3 starts \n', '', enco) repg.register_log([execution_log_path], '>>>>Step 3 - Predicting targets using best models \n' + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) repg.register_log( [execution_log_path], '>>>>Substep 3.1 - Preparing global dataframe of results \n' + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) observations = df_data_to_catalogue.iloc[ 0:maximum_number_of_observations_to_catalogue] events_to_predict = list(set(observations[name].values)) #target to predict target_to_predict = config_parser.get(prediction_section, glod.get_target_parameter_name()) #column for predictions prediction_column = target_to_predict + '_pred' df_global_predictions = pd.DataFrame( data=[], columns=[observation_number, prediction_column]) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([ time_log_path ], '>>>>Subtep 3.1 - Preparing global dataframe of results total elapsed time: ' + str(substep_finish_time - substep_init_time) + "\n", '', enco) repg.register_log([execution_log_path], '>>>>Substep 3.1 ends \n' + substep_finish_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) total_number_predicted_observations = 0 final_list_events_to_predict = [] for event in events_to_predict: substep_init_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([time_log_path], '>>>>Subtep 3.2 - Predicting targets for event ' + event + ' \n', '', enco) repg.register_log( [execution_log_path], '>>>>Substep 3.2 - Predicting targets for event ' + event + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) df_event = observations[observations[name] == event] df_event_obsnumber = pd.DataFrame( data=df_event[observation_number].values, columns=[observation_number]) try: dic_event = dictionary_of_events_preloaded_models[event] total_number_predicted_observations += len(df_event) if target_to_predict not in df_event.columns: repg.register_log( [execution_log_path], '>> ###Error: The target ' + target_to_predict + ' does not exist in the dataset of the event ' + event + '\n\n', '', enco) raise Exception( '>> ###Error: The target ' + target_to_predict + ' does not exist in the dataset of the event ' + event) if target_to_predict in dic_event: repg.register_log( [execution_log_path], '>> The event ' + event + ' (with ' + str(len(df_event)) + ' observations), has a model for predict target labels \n', '', enco) features_event = dic_event_model[event][target_to_predict][ glod.get_current_features_key()] model_event = dictionary_of_events_preloaded_models[event][ target_to_predict][ glod.get_best_model_key()] #se referencia al modelo predictions = model_event.predict(df_event[features_event]) df_event_obsnumber[prediction_column] = predictions recatalogued_predictions = [] if dic_event_model[event][target_trained][ glod.get_learning_key()] == glod.get_unsupervised_name( ): for pred in predictions: recatalogued_predictions.append( dic_event_model[event][target_trained][ glod.get_reasignment_dict_key()][pred]) predictions = recatalogued_predictions df_event_obsnumber[prediction_column] = predictions df_event_obsnumber[name] = event else: repg.register_log( [execution_log_path], '>> The event ' + event + ' (with ' + str(len(df_event)) + ' observations), has not models for predicting target (' + target_to_predict + '). Taking original prediction \n', '', enco) total_number_predicted_observations += len(df_event) df_event_obsnumber[prediction_column] = df_event[ target_to_predict].values final_list_events_to_predict.append(event) except Exception as excep: #no predictions models repg.register_log([execution_log_path], '>> The prediction process has been aborted ' + str(excep) + '\n', '', enco) #raise Exception(e) df_global_predictions = pd.concat( [df_global_predictions, df_event_obsnumber]) df_global_predictions[observation_number] = df_global_predictions[ observation_number].apply(int) substep_finish_time = datetime.datetime.fromtimestamp(time.time()) repg.register_log([time_log_path], '>>>>Substep 3.2 - Predicting targets for event ' + event + ' total elapsed time: ' + str(substep_finish_time - substep_init_time) + "\n", '', enco) repg.register_log([ time_log_path ], '>>>>Substep 3.2 - Estimated elapsed time predicting one observation for event ' + event + ': ' + str( float((substep_finish_time - substep_init_time).total_seconds()) / float(len(df_event))) + "\n", '', enco) repg.register_log( [execution_log_path], '>>>>Substep 3.2 - Predicting targets for event ' + event + ' ends ' + substep_init_time.strftime('%Y-%m-%d %H:%M:%S') + "\n", '', enco) type_observation_number = df_global_predictions[observation_number].dtypes observations[observation_number] = observations[observation_number].astype( type_observation_number) observations = pd.merge(observations, df_global_predictions, on=[observation_number, name]) for not_proc_event in set.difference(set(events_to_predict), set(final_list_events_to_predict)): repg.register_log( [execution_log_path], '>> WARNING: Event ' + not_proc_event + ' has not models, but validation/unkown samples dataset was provided\n', '', enco) print( "**WARNING**: Event " + not_proc_event + " has not models, but validation/unkown samples dataset was provided" ) for event in events_to_predict: print('\n-> Event: ', event) df_event = observations[observations[name] == event] path_to_predicted_data_root = 'Prediction_models' path_to_predicted_data = config_parser.get( prediction_section, glod.get_path_predicted_data_key()) #Accuracy print if (event in dic_event_model and target_to_predict in dic_event_model[event]): report_dict[event] = {target_to_predict: {}} print( '\t\tObservations with known target ', len(df_event[ df_event[target_to_predict] != label_non_catalogued_data])) print( '\t\tObservations with unknown target ', len(df_event[df_event[target_to_predict] == label_non_catalogued_data])) df_observaciones = df_event[ df_event[target_to_predict] != label_non_catalogued_data] total_obs = len(df_observaciones) #computing confusion matrix pred_labels = list(df_observaciones[prediction_column].values) true_labels = list(df_observaciones[target_to_predict].values) if (pred_labels != [] and true_labels != []): df_observaciones_temp = df_observaciones[ df_observaciones[target_to_predict] == df_observaciones[prediction_column]] total_aciertos = len(df_observaciones_temp) confusion_matrix = metr.get_confusion_matrix( true_labels, pred_labels, sorted( list(set(df_observaciones[target_to_predict].values)))) confusion_matrix_name = 'confusion_matrix_' + event + '_' + target_to_predict metr.save_confusion_matrix( confusion_matrix, sorted( list(set(df_observaciones[target_to_predict].values))), os.path.join(path_to_predicted_data_root, confusion_matrix_name), 'png') report_dict[event][target_to_predict][ glod.get_best_model_key()] = str( dictionary_of_events_preloaded_models[event] [target_to_predict][glod.get_best_model_key()]) report_dict[event][target_to_predict][ 'Correct'] = total_aciertos report_dict[event][target_to_predict]['Total'] = len(df_event[ df_event[target_to_predict] != label_non_catalogued_data]) report_dict[event][target_to_predict][ glod.get_accuracy_parameter_name()] = float( float(total_aciertos) / float( len(df_event[df_event[target_to_predict] != label_non_catalogued_data]))) report_dict[event][target_to_predict][ 'target_to_predict_cm'] = os.path.join( path_to_predicted_data_root, confusion_matrix_name) + '.png' report_dict[event][target_to_predict]['Predicted'] = len( df_event[df_event[target_to_predict] == label_non_catalogued_data]) else: total_obs = 0 if (total_obs != 0): repg.register_log( [time_log_path], '>>>>Substep 3.2 Extra - Accuracy of the model for event ' + event + ' and target ' + target_to_predict + '(' + str(float(total_aciertos)) + '/' + str(float(total_obs)) + '): ' + str(float(total_aciertos) / float(total_obs)) + "\n", '', enco) repg.register_log( [execution_log_path], '>>>>Substep 3.2 Extra - Accuracy of the model for event ' + event + ' and target ' + target_to_predict + '(' + str(float(total_aciertos)) + '/' + str(float(total_obs)) + '): ' + str(float(total_aciertos) / float(total_obs)) + "\n", '', enco) else: repg.register_log( [time_log_path], '>>>>Substep 3.2 Extra - Accuracy of the model for event ' + event + ' and target ' + target_to_predict + ': not calculated (no observations found) \n', '', enco) repg.register_log( [execution_log_path], '>>>>Substep 3.2 Extra - Accuracy of the model for event ' + event + ' and target ' + target_to_predict + ': not calculated (no observations found) \n', '', enco) print(''' Clasification for known targets results ##############################################################################################' ## ##''' ) if (total_obs != 0): print('\t\t\t\tCorrect predictions performed for ' + target_to_predict + ' of event ' + event + ': ' + str(total_aciertos) + '/' + str(total_obs)) else: print('\t\t\t\tNo predictions performed for severity of event ' + event) print('\t\t\t\tCheck output data at: ', path_to_predicted_data) print( '\t\t\t\tCheck predictions log for accuracy summary and more information' ) print('\n\t\t\t\t\t\tThanks for using RADSSo') print(''' ## ## ##############################################################################################''' ) #Determinamos cuantas predicciones sobre datos desconocidos se han realizado if (event in dic_event_model and target_to_predict in dic_event_model[event]): df_observaciones = df_event[df_event[target_to_predict] == label_non_catalogued_data] total_obs = len(df_observaciones) print(''' Prediction of unknown target results ##############################################################################################' ## ##''' ) print( '\t\t\t\tTotal of predictions performed for ' + target_to_predict + ': ', str(total_obs)) print('\t\t\t\tCheck output data at: ', path_to_predicted_data) print( '\t\t\t\tCheck predictions log for accuracy summary and more information' ) print('\n\t\t\t\t\t\tThanks for using RADSSo') print(''' ## ## ##############################################################################################\n''' ) try: if (event in report_dict): if (pred_labels != [] and true_labels != []): repg.create_report_prediction( report_dict, [event, target_to_predict], auxiliary_directory_filename, 'Prediction_models', enco) except Exception as e: print(''''********************** ****Critical Exception**** **************************''') print(e) step_finish_time = datetime.datetime.fromtimestamp(time.time()) total_time_step_3 = step_finish_time - step_init_time print('\n\n--- Total Elapsed time --- ' + str(total_time_step_3)) repg.register_log( [time_log_path], '>>>>Step 3 - Predicting using best models total elapsed time: ' + str(total_time_step_3) + "\n", '', enco) repg.register_log([time_log_path], '>>>> Number of observations processed by second ' + str( float(total_number_predicted_observations) / float(total_time_step_3.total_seconds())) + "\n", '', enco) repg.register_log([time_log_path], '>>>> Number of seconds by prediction ' + str( float(total_time_step_3.total_seconds()) / float(total_number_predicted_observations)) + "\n", '', enco) repg.register_log( [time_log_path], '>>>> Recodification and Prediction Phase - total elapsed time: ' + str(total_time_step_1 + total_time_step_2 + total_time_step_3) + "\n", '', enco) observations.to_csv(path_to_predicted_data, sep=input_files_delimiter_not_catalogued_data, encoding=enco) return ()