示例#1
0
__date__ = '9/25/2019 8:49 AM'

from ay_hw_3.util_data import load_data_and_label, is_bending
from ay_hw_3.util_generate import gen_train_data_file_paths
from ay_hw_3.util_statistic import gen_statistic_result

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

if __name__ == "__main__":
    # get all training data file paths
    allTrainFilePaths = gen_train_data_file_paths()

    trainStaticResult = pd.DataFrame()
    for index, path in enumerate(allTrainFilePaths):
        fileItem, fileLabel = load_data_and_label(path)
        staticResultItem = gen_statistic_result(fileItem, index + 1)
        staticResultItem["label"] = is_bending(fileLabel)
        trainStaticResult = trainStaticResult.append(staticResultItem)

    # ----------------same to the main_c_ii.py------------------
    features = [
        'min(1)', 'max(1)', 'mean(1)', 'min(2)', 'max(2)', 'mean(2)', 'min(6)',
        'max(6)', 'mean(6)', 'label'
    ]
    subStatisticResult = trainStaticResult[features]
    # print(subStatisticResult.to_string())
    sns.pairplot(subStatisticResult, hue="label", markers=["o", "+"])
    plt.show()
示例#2
0
	if not sys.warnoptions:
		warnings.simplefilter("ignore")

	allTrainFilePaths = gen_train_data_file_paths()
	# based on what the pdf said, we need to use all training data
	trainStaticResult = pd.DataFrame()
	for parts in range(1, 21):
		for index, path in enumerate(allTrainFilePaths):
			fileItem, fileLabel = load_data_and_label(path, hasTime=False)
			splitedDFs = split_DF_in_parts(fileItem, parts=parts, needConcat=False)
			statisticResultTemp = pd.DataFrame()
			for DFItem in splitedDFs:
				staticResultTempItem = gen_statistic_result(DFItem, index + 1, hasTime=False)
				statisticResultTemp = statisticResultTemp.append(staticResultTempItem, sort=False)

			statisticResultTemp["label"] = is_bending(fileLabel)
			trainStaticResult = trainStaticResult.append(statisticResultTemp, sort=False)

		logitModel = sm.Logit(trainStaticResult['label'],
							  trainStaticResult[gen_multiple_label(parts=1)])
		logitModelResults = logitModel.fit(method="bfgs",disp=0)
		# ['median(1)'] ['max(5)']
		significantVars = \
			[key for key, p_value in logitModelResults.pvalues.items() if p_value <= 0.05]
		if len(significantVars) > 0:
			print("When split all training data sets in {} times, "
				  "I got significant variables : ".format(parts), end=" ")
			print(' '.join(significantVars))
		significantVars = []
		# allocate a new space for statisticResult
		trainStaticResult = pd.DataFrame()