__date__ = '9/25/2019 8:49 AM' from ay_hw_3.util_data import load_data_and_label, is_bending from ay_hw_3.util_generate import gen_train_data_file_paths from ay_hw_3.util_statistic import gen_statistic_result import matplotlib.pyplot as plt import pandas as pd import seaborn as sns if __name__ == "__main__": # get all training data file paths allTrainFilePaths = gen_train_data_file_paths() trainStaticResult = pd.DataFrame() for index, path in enumerate(allTrainFilePaths): fileItem, fileLabel = load_data_and_label(path) staticResultItem = gen_statistic_result(fileItem, index + 1) staticResultItem["label"] = is_bending(fileLabel) trainStaticResult = trainStaticResult.append(staticResultItem) # ----------------same to the main_c_ii.py------------------ features = [ 'min(1)', 'max(1)', 'mean(1)', 'min(2)', 'max(2)', 'mean(2)', 'min(6)', 'max(6)', 'mean(6)', 'label' ] subStatisticResult = trainStaticResult[features] # print(subStatisticResult.to_string()) sns.pairplot(subStatisticResult, hue="label", markers=["o", "+"]) plt.show()
if not sys.warnoptions: warnings.simplefilter("ignore") allTrainFilePaths = gen_train_data_file_paths() # based on what the pdf said, we need to use all training data trainStaticResult = pd.DataFrame() for parts in range(1, 21): for index, path in enumerate(allTrainFilePaths): fileItem, fileLabel = load_data_and_label(path, hasTime=False) splitedDFs = split_DF_in_parts(fileItem, parts=parts, needConcat=False) statisticResultTemp = pd.DataFrame() for DFItem in splitedDFs: staticResultTempItem = gen_statistic_result(DFItem, index + 1, hasTime=False) statisticResultTemp = statisticResultTemp.append(staticResultTempItem, sort=False) statisticResultTemp["label"] = is_bending(fileLabel) trainStaticResult = trainStaticResult.append(statisticResultTemp, sort=False) logitModel = sm.Logit(trainStaticResult['label'], trainStaticResult[gen_multiple_label(parts=1)]) logitModelResults = logitModel.fit(method="bfgs",disp=0) # ['median(1)'] ['max(5)'] significantVars = \ [key for key, p_value in logitModelResults.pvalues.items() if p_value <= 0.05] if len(significantVars) > 0: print("When split all training data sets in {} times, " "I got significant variables : ".format(parts), end=" ") print(' '.join(significantVars)) significantVars = [] # allocate a new space for statisticResult trainStaticResult = pd.DataFrame()