### Add new age column ### df_vektis['AGE'] = age ### For getting some basic info ### if input['check_missing'] == True: func.check_missing(df, col, year) if input['data_description'] == True: func.data_describe(df, col, year) ### For three plots ### loop = input['age_range'] for i in loop: df_avg = func.groupAgeRange(df_vektis, i, 0) if input['correlation_matrix'] == True: func.corr_Matrix(df_avg, i, year) if input['pie_chart'] == True: func.pie_Chart(df_avg, i, year) if input['distribution_plot'] == True: func.dist_Plot(df_avg, 'SUM', i, year) ### Only for the Stack plot ### if input['stacked_area'] == True: loop = list(range(0, 90, 1)) df_stack = pd.DataFrame() for i in loop: df_avg = func.groupAgeRange(df_vektis, i, df_stack) df_stack[i] = df_avg.mean(axis=0, skipna=True) df_stack_trans = df_stack.transpose()
for i in range(0, len(input['taskName'])): file = input['taskName'][i] ############################### # 1.Overview on combined data # ############################### ### For getting some basic info ### checkMissing = input['check_missing'][i] if checkMissing == True: func.check_missing(combined_df, col, file) ### Function for correlation matrix ### CorrMatrix = input['correlation_matrix'][i] if CorrMatrix == True: func.corr_Matrix(combined_df[col], file) ### Function for Cat-Num plot ### CN_plot = input["Cat_Num_plot"][i] if CN_plot == True: CN_feature = input["Cat_Num_feature"][i] if len(CN_feature) > 0: for f in CN_feature: print(f) func.plot_catNum(combined_df, f, file) ### Function for Box plot ### BoxPlot = input["Box_plot"] if BoxPlot == True: BoxPlot_feature = input["Box_plot_feature"] if len(BoxPlot_feature) > 0:
except: logger.error("Some of your selected_features and excluded_features are not in the dataset") else: ### Check missing values in the dataset ### if inputYAML['check_missing'] == True: func.check_missing(df, col, file_name) ### Get the basic description about the dataset ### if inputYAML['data_description'] == True: func.data_describe(df, col, file_name) ### Function for correlation matrix ### if inputYAML['correlation_matrix'] == True: func.corr_Matrix(df[col], file_name) ### Separate features to numerical and categorical ### numFea = [] catFea = [] for c in col: if len(Counter(df[c].dropna())) > 20: numFea.append(c) else: catFea.append(c) ### Function for distribution plot ### if inputYAML['distribution_plot'] == True: if inputYAML['distribution_feature'] == 'ALL': for f in numFea: try: