Exemplo n.º 1
0
def create_basic_report_data_dict(umbral,target,main_metric,feature_selection_method,penalize_falses,lista_variables_descartadas,ruta_logo):
    report_data = {glod.get_title_key(): "Overview With Execution Information",
                   glod.get_logo_key():ruta_logo,                   
                   glod.get_umbral_key(): str(umbral),
                   glod.get_main_metric_key(): str(main_metric),
                   glod.get_feature_selection_key(): str(feature_selection_method),
                   glod.get_penalization_name(): str(penalize_falses),                   
                   glod.get_objective_target_key(): target,
                   glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas},
                   glod.get_general_info_execution_key():''
                    }
    return report_data
Exemplo n.º 2
0
def create_report_data_dict(evento,umbral,target,lista_variables_descartadas,ruta_logo):
    report_data = {glod.get_objective_target_key(): target,
                   glod.get_event_key():evento,
                   glod.get_logo_key():ruta_logo,
                   glod.get_report_general_info_key():{glod.get_report_generic_target_key():{},
                                  glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas,glod.get_empty_or_constant_key():[],glod.get_score_relevant_key():[]},
                                  glod.get_training_division_key():{},
                                  glod.get_test_division_key():{},                                  
                                  },
                    glod.get_umbral_key(): str(umbral),
                    glod.get_warning_key(): ''
                        }
    return report_data
Exemplo n.º 3
0
def create_report_data_dict(evento, umbral, target, lista_variables_descartadas, ruta_logo):
    '''This funcion allows to create the structure for the report data dictionary
    for the current event'''

    report_data = {glod.get_objective_target_key(): target,
                   glod.get_event_key():evento,
                   glod.get_logo_key():ruta_logo,
                   glod.get_report_general_info_key():{glod.get_report_generic_target_key():{},
                                                       glod.get_variables_key():{glod.get_deleted_by_user_key():lista_variables_descartadas, glod.get_empty_or_constant_key():[], glod.get_score_relevant_key():[]},
                                                       glod.get_training_division_key():{},
                                                       glod.get_test_division_key():{},
                                                      },
                   glod.get_umbral_key(): str(umbral),
                   glod.get_warning_key(): glod.get_empty_string()
                  }
    return report_data
Exemplo n.º 4
0
def create_basic_report_data_dict(basic_parameters, lista_variables_descartadas, ruta_logo):
    '''This funcion allows to create the structure for the report data dictionary'''

    umbral = basic_parameters[0]
    target = basic_parameters[1]
    main_metric = basic_parameters[2]
    feature_selection_method = basic_parameters[3]
    penalize_falses = basic_parameters[4]

    report_data = {glod.get_title_key(): "Overview With Execution Information",
                   glod.get_logo_key():ruta_logo,
                   glod.get_umbral_key(): str(umbral),
                   glod.get_main_metric_key(): str(main_metric),
                   glod.get_feature_selection_key(): str(feature_selection_method),
                   glod.get_penalization_name(): str(penalize_falses),
                   glod.get_objective_target_key(): target,
                   glod.get_variables_key():{glod.get_deleted_by_user_key():\
                                         lista_variables_descartadas},
                   glod.get_general_info_execution_key():glod.get_empty_string()
                  }
    return report_data
Exemplo n.º 5
0
def create_report_current_model(report_dict,lista_modelos,ruta_relativa_datos_auxiliares,ruta_directorio_informes,enco):

    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html')    
    
    if(lista_modelos == []): #if process not completed
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'incomplete_event_report_template.html') #usamos la plantilla de informes incompletos
        
        template_vars = {glod.get_title_key(): "Incomplete Execution Report",
                         glod.get_logo_key(): encode_image(report_dict[glod.get_logo_key()].replace('\'','')),
                         glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()],
                         glod.get_event_key(): report_dict[glod.get_event_key()],
                         glod.get_info_key(): "        " + report_dict['Warning_info']
                         }
        
        #html
        with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
            output_file.write(template.render(template_vars))        
        
        #pdf
        with codecs.open(ruta_plantilla_temporal, 'r') as html_leido:
            pdf_resultante=os.path.join(ruta_directorio_informes,"report_"+report_dict[glod.get_event_key()]+"_incomplete.pdf")
            with open(pdf_resultante, "wb") as incomplete_rep:
                pisa.CreatePDF(html_leido.read(),incomplete_rep)
                logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
        
    else:
        lista_pares_modelo_indice = auxf.order_models_by_score_and_time(report_dict,lista_modelos)        
        template = env.get_template(ruta_relativa_datos_auxiliares + '/' +'report_template.html') #using standard template
        for modelo in lista_modelos:
            if(modelo in report_dict):
            
                observations_targets="<p><strong>Target distribution of observations</strong></br>"
                for ob_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()].keys()):
                    observations_targets+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(ob_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_report_generic_target_key()][ob_target]) + "</br>"
                observations_targets+="</p>"
            
                variables_summary="<p><strong>Summary of variables</strong></br>"
            
            
                discarded_for_event = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_discarded_key()]
                
                variables_summary+="<br><i><u>Deleted by the user at the begining:</i></u></br>"
                for deleted_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_deleted_by_user_key()]:
                    variable_dis=''
                    if deleted_var in discarded_for_event:
                        variable_dis = "<strong>" + deleted_var + "</strong>"
                    else:
                        variable_dis = deleted_var
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ variable_dis + "</br>"
                variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were specified by the user to be discarded specifically for this event<i></br>"
                variables_summary+="</br>"
                                                
                variables_summary+="<br><i><u>Deleted in execution time(Empty or Constant):</i></u></br>"
                for emp_con_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_empty_or_constant_key()]:
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ emp_con_var + "</br>"
                variables_summary+="</br>"
                
                variables_summary+="<br><i><u>Requested for the event by the user:</i></u></br>"
                for req_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_user_requested_key()]:
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ req_var + "</br>"
                variables_summary+="</br>"
                                       
                variables_summary+="<br><i><u>Used during the process:</i></u></br>"
                
                diccionario_relevantes_mif = report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_score_relevant_key()]
                sorted_relevant_vars = sorted(diccionario_relevantes_mif.items(), key=operator.itemgetter(1), reverse=True)
                for relevant_var in sorted_relevant_vars:
                    rel_variable= relevant_var[0]
                    rel_variable = "<strong>" + rel_variable +'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;'+ str(diccionario_relevantes_mif[rel_variable]) +"</strong>"
                    variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ rel_variable + "</br>"
                
                for relevant_var in report_dict[glod.get_report_general_info_key()][glod.get_variables_key()][glod.get_used_in_process()]:
                    if (relevant_var not in diccionario_relevantes_mif)   :
                        variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;"+ relevant_var + "</br>"
                variables_summary+="&nbsp;&nbsp;&nbsp;&nbsp;<i>*variables in bold were used to train the models<i></br>"
                variables_summary+="</p>"
            
            
                #Information about the model                    
                accuracy = "</br></br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Accuracy: "+ str(float(round(report_dict[modelo][glod.get_accuracy_parameter_name()],5)))+"</strong>"
            
                ranking = get_string_with_ranking_of_models(lista_pares_modelo_indice,modelo)
                
                model_info = "<p><strong>Parameters used to configure the model</strong></br>"
                for param in report_dict[modelo][glod.get_parameters_key()]:
                    model_info+= "&nbsp;&nbsp;&nbsp;&nbsp;<i>"+ param + "</i>: " + str(report_dict[modelo][glod.get_parameters_key()][param]) + "</br>"
                model_info+="</p>"
                
                time_info = "<p><strong>Time elapsed</strong></br>"
                tiempo_seleccion_parametros = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_sel_init_key()]
                tiempo_entrenamiento = report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_finish_key()] - report_dict[modelo][glod.get_time_parameters_key()][glod.get_time_train_init_key()]
                time_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "Parameters selection time: "+ str(tiempo_seleccion_parametros) + "</br>"
                time_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "Training time: "+ str(tiempo_entrenamiento) + "</br>"
                time_info+="</p>"
                
                
                total_train = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                train_distribution_info ="<p></br><strong>Training Data Distribution</strong></br>"
                for train_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()].keys()):
                    train_distribution_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(train_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]) + "</br>"
                    vector_of_targets.append(train_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target]))
                    total_train+=float(report_dict[glod.get_report_general_info_key()][glod.get_training_division_key()][train_target])
                train_distribution_info+="</p>"
                #getting null train accuracy
                null_train_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_train,4))
                
                null_train_accuracy = max(vector_of_percentages_by_target)
                                                            
                total_test = 0.0
                vector_of_targets = []
                vector_of_values_by_target = []
                vector_of_percentages_by_target = []
                test_distribution_info ="<p><strong>Test Data Distribution</strong></br>"
                for test_target in auxf.natsorted(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()].keys()):
                    test_distribution_info+="&nbsp;&nbsp;&nbsp;&nbsp;"+ "With target " + str(test_target) + " :"+ str(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]) + "</br>"
                    vector_of_targets.append(test_target)
                    vector_of_values_by_target.append(float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target]))
                    total_test+=float(report_dict[glod.get_report_general_info_key()][glod.get_test_division_key()][test_target])
                test_distribution_info+="</p>"
                null_test_accuracy = 0.0
                for indice_t in range(len(vector_of_values_by_target)):
                    vector_of_percentages_by_target.append(round(vector_of_values_by_target[indice_t]/total_test,4))                
                null_test_accuracy = max(vector_of_percentages_by_target)
                                           
                event = report_dict[glod.get_event_key()]
                template_vars = {glod.get_title_key(): "Execution Report",
                             glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')),                             
                             glod.get_model_key(): modelo,
                             glod.get_report_generic_target_key(): report_dict[glod.get_objective_target_key()],
                             glod.get_event_key(): event,
                             glod.get_accuracy_parameter_name(): str(accuracy)+"<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Null train acc: "+ str(null_train_accuracy)+"</strong>"+"<br>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<strong>Null test acc: "+ str(null_test_accuracy)+ "</strong></p>",
                             glod.get_models_ranking_key(): ranking,
                             glod.get_observations_targets_key(): observations_targets,
                             glod.get_variables_summary_key(): variables_summary,
                             glod.get_models_info_key(): model_info,
                             glod.get_time_info_key(): time_info,
                             glod.get_train_distribution_info_key(): train_distribution_info,
                             glod.get_test_distribution_info_key(): test_distribution_info
                         }
                template_vars[glod.get_metrics_info_key()] =""
                for metric in report_dict[modelo][glod.get_metrics_micro_avg_key()]:
                    template_vars[glod.get_metrics_info_key()] +="<p>"+"<strong>"+metric+"</strong>: " + report_dict[modelo][glod.get_metrics_micro_avg_key()][metric] +"</br>"
                template_vars[glod.get_metrics_info_key()] +="</p>"
                
                if glod.get_model_parameters_plot_name() in report_dict[modelo]:
                    template_vars[glod.get_image_parameters_accuracy_key()] = encode_image(report_dict[modelo][glod.get_model_parameters_plot_name()].replace('\'',''))
                
                if glod.get_confussion_matrix_train_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_train_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_train_path_key()].replace('\'',''))
                    
                if glod.get_confussion_matrix_test_path_key() in report_dict[modelo]:
                    template_vars[glod.get_conf_test_img_key()] = encode_image(report_dict[modelo][glod.get_confussion_matrix_test_path_key()].replace('\'',''))
            
                if(glod.get_learning_curve_key() in report_dict[modelo]):                
                    template_vars[glod.get_learning_curve_key()] = encode_image(report_dict[modelo][glod.get_learning_curve_key()].replace('\'',''))
                
                
                metrics_by_label = "<table width='100%' border='1' cellspacing='0' cellpadding='5'>"
                keys = ''
                for elemento in auxf.natsorted(report_dict[modelo][glod.get_metrics_key()].keys()):
                    if(keys == ''):
                        keys = report_dict[modelo][glod.get_metrics_key()][elemento].keys()
                        metrics_by_label+="<tr><td align='center' class='black'>"+ glod.get_report_generic_target_key() +"</td>"
                        for cabecera in keys:                        
                            metrics_by_label+="<td align='center' class='black'>" + cabecera +"</td>"
                        metrics_by_label += "</tr>"
                    metrics_by_label+= "<tr><td>" + elemento.replace('target_','') + "</td>"
                    for key in keys:
                        metrics_by_label += "<td>"+str(report_dict[modelo][glod.get_metrics_key()][elemento][key])+"</td>"
                    metrics_by_label+= "</tr>"
                metrics_by_label+="</table>"
                template_vars[glod.get_metrics_by_label_key()] = metrics_by_label
                                
                #generamos el html                
                with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
                    output_file.write(template.render(template_vars))
                                        
                #generamos el pdf            
                with codecs.open(ruta_plantilla_temporal, mode='r',encoding=enco) as read_html:
                    pdf_resultante=os.path.join(ruta_directorio_informes,modelo + "_report_for_"+ event +".pdf")
                    with open(pdf_resultante, mode='wb') as pdf_gen:                                             
                        pisa.CreatePDF(read_html.read(),pdf_gen)                        
                        logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
    
    if(os.path.exists(ruta_plantilla_temporal)):
        os.remove(ruta_plantilla_temporal)
Exemplo n.º 6
0
def create_report_current_execution(report_dict,lista_eventos,lista_variables_usuario,lista_listas_variables_descartadas,lista_aprendizajes,lista_modelos, diccionario_aprendizajes, ruta_relativa_datos_auxiliares, ruta_directorio_resultados,enco):
    env = Environment(loader=FileSystemLoader('.'))
    ruta_plantilla_temporal = os.path.join(ruta_relativa_datos_auxiliares,'temp_html.html')
    template = env.get_template(ruta_relativa_datos_auxiliares + '/' + 'general_execution_template.html')
       
    template_vars = {glod.get_title_key(): report_dict[glod.get_title_key()],
                     glod.get_logo_key():encode_image(report_dict[glod.get_logo_key()].replace('\'','')),                     
                     glod.get_general_info_execution_key():''
                     }
                     
    #General parameters (target,umbral,variables_descartadas)
    target = report_dict[glod.get_objective_target_key()]
    umbral = report_dict[glod.get_umbral_key()]
    main_metric = report_dict[glod.get_main_metric_key()]
    feature_selection_method = report_dict[glod.get_feature_selection_key()]
    penalize_falses = report_dict[glod.get_penalization_name()]
    lista_variables_descartadas = report_dict[glod.get_variables_key()][glod.get_deleted_by_user_key()]
    
    tabulacion = "&nbsp;&nbsp;&nbsp;&nbsp;"
    informacion= "<h3>Common Parameters </h3></p>"
    informacion+= tabulacion+tabulacion + "<i>Objective Target: </i>" + target + "</br></br>"
    informacion+=tabulacion+tabulacion + "<i>Percentil for Scoring Function: </i>" + umbral + "</br></br>"
    informacion+=tabulacion+tabulacion + "<i>Main metric: </i>" + main_metric + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Feature selection method: </i>" + feature_selection_method + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Penalize falses: </i>" + penalize_falses + "</br></br>"    
    informacion+=tabulacion+tabulacion + "<i>Common Discarded Variables:</i></br>"
    for variable_descartada in lista_variables_descartadas:
        informacion+=tabulacion+tabulacion+tabulacion + variable_descartada + "</br>"
    if(lista_variables_descartadas == []):
        informacion+=tabulacion+"No variables were selected to be discarded</br>"
    informacion+="</p>"
        
    informacion+= "<h3>Events to be processed: </h3><p>"
    for indice in range(len(lista_eventos)):
        informacion+=tabulacion+"<strong>"+ lista_eventos[indice] + "</strong></br>"        
        informacion+=tabulacion+tabulacion+"<i>Important features for the user:</i> </br>"
        if(lista_variables_usuario[indice]):            
            for variable in lista_variables_usuario[indice]:
                informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>"
        else:
            informacion+=tabulacion+tabulacion+tabulacion+"No important features were specified</br>"
        informacion+="</br>"
        
        informacion+=tabulacion+tabulacion+"<i>Discarded variables by the user:</i> </br>"
        if(lista_listas_variables_descartadas[indice]):            
            for variable in lista_listas_variables_descartadas[indice]:
                informacion+=tabulacion+tabulacion+tabulacion+variable + "</br>"
        else:
            informacion+=tabulacion+tabulacion+tabulacion+"No variables were discarded</br>"
        informacion+="</br>"
        
        informacion += tabulacion+tabulacion+"<i>Learnings to be applied: </i></br>"
        aprendizaje = lista_aprendizajes[indice]
        modelos = lista_modelos[indice]
        if aprendizaje == glod.get_all_learning_modes_name():#looping supervised models
            informacion += tabulacion+tabulacion+tabulacion+"<u>" +\
            str(diccionario_aprendizajes[1]) + "</u>:</br>"
            modelos_sup = modelos[0]
            for modelo_act in modelos_sup:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"
            informacion += "</br>"
            
            informacion += tabulacion+tabulacion+tabulacion+"<u>" +\
            str(diccionario_aprendizajes[2]) + "</u>:</br>"
            modelos_unsup = modelos[1]
            for modelo_act in modelos_unsup:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"
            informacion += "</br>"

        else:
            informacion += tabulacion+tabulacion+tabulacion+"<u>"+aprendizaje + "</u>:</br>"
            for modelo_act in modelos:
                informacion += tabulacion+tabulacion+tabulacion+tabulacion + modelo_act + "</br>"

        informacion += "</p>"

        template_vars[glod.get_general_info_execution_key()] = informacion
                    
    with codecs.open(ruta_plantilla_temporal,'w',encoding=enco) as output_file:
        output_file.write(template.render(template_vars))
                
    with codecs.open(ruta_plantilla_temporal, 'r') as html_leido:
        pdf_resultante=os.path.join(ruta_directorio_resultados,"General_execution_report_"+ target +".pdf")
        with open(pdf_resultante, "wb") as gen_report:
            pisa.CreatePDF(html_leido.read(),gen_report)
            logging.getLogger("xhtml2pdf").addHandler(PisaNullHandler())
            
    if(os.path.exists(ruta_plantilla_temporal)):
        os.remove(ruta_plantilla_temporal)