'age', 'workclass', 'education_num', 'marital_status', 'post', 'relationship', 'nation', 'gender', 'capital', 'hours_per_week', 'country', 'wealth' ] for column in columns_name: encode_labels = label_encode(column) label_encode_features.append(encode_labels) data = combine_features_tuples(label_encode_features) df = DataFrame(data, columns=columns_name) df.cumsum().plot() plt.savefig("outs\\cumsum.png") plt.close() df.diff().hist() plt.savefig("outs\\diff_hist.png") plt.close() for col in columns_name: df[col].plot.box() plt.savefig("outs\\boxes\\{}.png".format(col)) plt.close() for col in columns_name: result = sql_manager.crs.execute(( "select distinct {},count({}) from information group by {}".format( col, col, col))).fetchall() counts = [x[1] for x in result] attr = [x[0] for x in result] fig1, ax1 = plt.subplots()