processing = Processing_DB_Files()
project = Project()
#tuple from MPL
t_aux = []
for i in range(0,500):
    t_aux.append(500)
t = tuple(t_aux)
####
classifiers = {"MPL": MLPClassifier(random_state=1, solver="adam", activation="relu", max_iter=100000, alpha=1e-5, hidden_layer_sizes=t), "Extratrees": ExtraTreesClassifier(n_estimators = 1000, random_state=1), "Knn":KNeighborsClassifier(n_neighbors=5), "Naive Bayes":GaussianNB(), "RandomForest":RandomForestClassifier(n_estimators = 1000, random_state=1), "Decision Tree":tree.DecisionTreeClassifier(random_state=1), "SVM":svm.SVC(probability=True, random_state=1)}
persons = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
get_accuracy = Get_Accuracy()
balance_data = BalanceData()
threshold_balance_data = 40
#Select the best classifier
accuracy_mean = pd.DataFrame(columns=["Classifier", "Accuracy"])
project.log("=====================ARCMA_SELECT_BEST_ALGORITHM=====================", file="arcma_best_algorithm.log")
for c in classifiers:
    print(c)
    person_accuracies = []
    person_f_score = []
    person_precision = []
    person_recall = []
    times_to_predict = []
    for p in persons:
        s = save()
        try:
            relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p))
            y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p))
            y = pd.DataFrame(y, columns=[arcma.label_tag])
        except:
            print("file from person {} not found!".format(p))
示例#2
0
extra_trees = ExtraTreesClassifier(n_estimators=1000, random_state=0)
get_accuracy = Get_Accuracy()
balance_data = BalanceData()
threshold_balance_data = 40

#===LOAD FEATURES===#

#Interate threshold to find de best value#
persons = [
    "f1", "m1", "m2", "f2", "m3", "f3", "m4", "m5", "m6", "m7", "f4", "m8",
    "m9", "f5", "m10", "m11"
]
accuracy_by_person = pd.DataFrame()
threshold = 0.65
project.log(
    "=========== HMP StratifiedKFold Accuracy, Thresold = {}===========".
    format(threshold),
    file="hmp_log_final_accuracy.log")
for p in persons:
    s = save()
    relevant_features = s.load_var(
        "hmp_relevant_features_best_window{}relevant_features_{}.pkl".format(
            slash, p))
    y = s.load_var("hmp_relevant_features_best_window{}y_{}.pkl".format(
        slash, p))
    y = pd.DataFrame(y, columns=[hmp.label_tag])
    skf = StratifiedKFold(n_splits=10, random_state=None, shuffle=False)

    accuracy = {}
    accuracies = []
    balanced_data = balance_data.balance_data(relevant_features, y,
                                              threshold_balance_data)
hmp = HMP_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators=10000, random_state=0)
base_classification = Base_Classification(hmp, extra_trees)

#===LOAD FEATURES===#

#Interate threshold to find de best value#
s = save()
person_list = ["f1", "m1", "m2"]
accuracy_threshould_list = []
data = {}
threshold = 0.35
project.log(
    "=========== HMP Outlier Accuracy, Thresold = {}===========".format(
        threshold))
for p in person_list:
    project.log("===========Person {}===========".format(p))
    data = s.load_var("hmp_relevant_features{}relevant_features_{}.pkl".format(
        slash, p))
    y = s.load_var("hmp_relevant_features{}y_{}.pkl".format(slash, p))
    y = pd.DataFrame(y, columns=[hmp.label_tag])

    print("------------------------------------")
    print("Person: {}".format(p))
    print("------------------------------------")

    return_accuracy = base_classification.get_accuracy.stratified_kfold_accuracy_outlier(
        data, y, extra_trees, threshold, p)
    project.log(str(return_accuracy), file="hmp_log.log")
#===INITIALIZATION===#
Debug.DEBUG = 0
arcma = ARCMA_Model()
processing = Processing_DB_Files()
project = Project()
persons = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
get_accuracy = Get_Accuracy()
balance_data = BalanceData()
threshold_balance_data = 40
#Select de best windows
t = time.time()
best_model = ExtraTreesClassifier(n_estimators = 1000, random_state=0)
w_accuracies = pd.DataFrame(columns=["window", "accurary"])
p = 15 # pessoa com mais registros
project.log("=====================ARCMA_SELECT_BEST_WINDOWS=====================", file="arcma_log_best_window.log")
for w in range(20,110,10):
    
    print("Load data with window len = {}".format(w))
    data = arcma.load_training_data_by_people(p)
    print("Slicing Window....")
    data_tsfresh, y = arcma.slice_by_window_tsfresh(data, w)
    y.index += 1
    del data_tsfresh["activity"]
    
    classes_counts = y.value_counts()
    if len(classes_counts) > 1:
        relevant_features = extract_relevant_features(data_tsfresh, y, column_id='id', column_sort='time')
        y = pd.DataFrame(y, columns=[arcma.label_tag])
    
        balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)
                                              column_id='id',
                                              column_sort='time')
X_train, X_test, y_train, y_test = train_test_split(relevant_features,
                                                    y2,
                                                    test_size=0.2,
                                                    random_state=42)
extra_trees = ExtraTreesClassifier(n_estimators=10000,
                                   max_depth=1000,
                                   random_state=0)
extra_trees.fit(X_train, y_train)
start_time = time.time()
pred = extra_trees.predict(X_test)
end_time = time.time()
accuracy = accuracy_score(y_test, pred)
project.log("Accuracy to all ts_features ({}): {} - Time: {} seconds.".format(
    len(relevant_features.columns), accuracy,
    (end_time - start_time) / len(y_test)))
time.sleep(10)
del X_train, X_test, y_train, y_test, extra_trees, start_time, pred, accuracy
time.sleep(10)

# 2º - ACURÁCIA COM OS 10% MAIS RELEVANTES ts_features
ts_extratree_features_importance = pd.DataFrame(
    extra_trees.feature_importances_,
    index=X_train.columns,
    columns=['importance']).sort_values('importance', ascending=False)
len_features = len(ts_extratree_features_importance)
best_features = ts_extratree_features_importance.index[0:int((len_features /
                                                              10) - 1)]
ts_final_features = relevant_features[best_features]
示例#6
0
Debug.DEBUG = 0
arcma = ARCMA_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators = 1000, random_state=0)
base_classification = Base_Classification(arcma, extra_trees)
balance_data = BalanceData()
threshold_balance_data = 40

#Interate threshold to find de best value#

s = save()
person_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"])
accuracy_mean = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"])
project.log("====================ARCMA BEST THRESHOLD============================", file="arcma_log_best_threshold.log")
for t in np.arange(0.05, 1, 0.05):
    accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity"])
    for p in person_list:
        relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p))
        y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p))
        y = pd.DataFrame(y, columns=[arcma.label_tag])
        
        balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)
        if isinstance(balanced_data, tuple):
            x_train, x_test, y_train, y_test = train_test_split(balanced_data[0], balanced_data[1], test_size=0.2, random_state=42)
            data = {}
            data[p] = {}
            data[p]["training"] = {}
            data[p]["training"]["training_features"] = x_train
            data[p]["training"]["training_labels"] = y_train
示例#7
0
#===INITIALIZATION===#
Debug.DEBUG = 0
umafall = UMAFALL_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators = 10000, random_state=0)
base_classification = Base_Classification(umafall, extra_trees)

#===LOAD FEATURES===#

#Interate threshold to find de best value#
s = save()
person_list = [14,15, 16, 17]
accuracy_threshould_list = []
data = {}
threshold = 0.65
project.log("=========== UMAFALL Outlier Accuracy, Thresold = {}===========".format(threshold), file="umafall_log.log")
for p in person_list:
    project.log("===========Person {}===========".format(p), file="umafall_log.log")
    data = s.load_var("umafall_relevant_features{}relevant_features_{}.pkl".format(slash, p))
    y = s.load_var("umafall_relevant_features{}y_{}.pkl".format(slash, p))
    y = pd.DataFrame(y, columns=[umafall.label_tag])
     
    print("------------------------------------")
    print("Person: {}".format(p))
    print("------------------------------------")
    
    return_accuracy = base_classification.get_accuracy.stratified_kfold_accuracy_outlier(data, y, extra_trees, threshold, p, column_test=list(data.columns.values)[0])
    project.log(str(return_accuracy), file="umafall_log.log")

示例#8
0
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators=10000, random_state=0)
base_classification = Base_Classification(arcma, extra_trees)
balance_data = BalanceData()
threshold_balance_data = 40

#===LOAD FEATURES===#

#Interate threshold to find de best value#
s = save()
person_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
accuracy_threshould_list = []
data = {}
threshold = 0.60
project.log(
    "=========== ARCMA Outlier Accuracy, Thresold = {}===========".format(
        threshold),
    file="arcma_log_outlier_accuracy.log")
for p in person_list:
    project.log("===========Person {}===========".format(p),
                file="arcma_log_outlier_accuracy.log")
    data = s.load_var(
        "arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(
            slash, p))
    y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(
        slash, p))
    y = pd.DataFrame(y, columns=[arcma.label_tag])

    print("------------------------------------")
    print("Person: {}".format(p))
    print("------------------------------------")
#===INITIALIZATION===#
Debug.DEBUG = 0
umafall = UMAFALL_Model()
processing = Processing_DB_Files()
project = Project()
persons = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
get_accuracy = Get_Accuracy()
balance_data = BalanceData()
threshold_balance_data = 40
#Select de best windows
t = time.time()
best_model = ExtraTreesClassifier(n_estimators=1000, random_state=0)
w_accuracies = pd.DataFrame(columns=["window", "accurary"])
p = 1  # pessoa com mais registros
project.log(
    "=====================UMAFALL_SELECT_BEST_WINDOWS=====================",
    file="umafall_log_best_window.log")
for w in range(10, 110, 10):

    print("Load data with window len = {}".format(w))
    data = umafall.load_training_data_by_people(
        p, additional_where="and sensor=2 and body=2")
    print("Slicing Window....")
    data_tsfresh, y = umafall.slice_by_window_tsfresh(data, w)
    y.index += 1
    del data_tsfresh["activity"]

    classes_counts = y.value_counts()
    if len(classes_counts) > 1:
        relevant_features = extract_relevant_features(data_tsfresh,
                                                      y,
示例#10
0
                        
                        person_accuracies.append(accuracy)
                        times_to_predict.append(spent_time)
                        person_precision.append(precision)
                        person_recall.append(recall)
                        person_f_score.append(f_score)
                        person_discarteds.append(discarteds)
                        person_len_activities.append(len_activity)
                    except Exception as e:
                        print(e)
                     
            out_aux = pd.DataFrame({"Classifier":[type(classifiers[c]).__name__], "Threshold": t, "Accuracy":[st.mean(person_accuracies)], "Precision":[st.mean(person_precision)], "Recall":[st.mean(person_recall)], "F-Score":[st.mean(person_f_score)], "Time":[st.mean(times_to_predict)], "Discarteds":[st.mean(person_discarteds)], "Len Activities":[st.mean(person_len_activities)]})
            accuracy_mean = pd.concat([accuracy_mean, out_aux])
    accuracy_mean.to_csv(s.path+"new_results{}{}_best_threshold_balanced_data_window{}.csv".format(slash, model['model_name'], model['window']), sep='\t', encoding='utf-8')
              
'''
#Interate threshold to find de best value#

s = save()
person_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"])
accuracy_mean = pd.DataFrame(columns=["accuracy","discarted", "len_activity", "threshold"])
project.log("====================ARCMA BEST THRESHOLD============================", file="arcma_log_best_threshold.log")
for t in np.arange(0.05, 1, 0.05):
    accuracy_threshould_temp_aux = pd.DataFrame(columns=["accuracy","discarted", "len_activity"])
    for p in person_list:
        relevant_features = s.load_var("arcma_relevant_features_best_window{}relevant_features_{}.pkl".format(slash, p))
        y = s.load_var("arcma_relevant_features_best_window{}y_{}.pkl".format(slash, p))
        y = pd.DataFrame(y, columns=[arcma.label_tag])
        
        balanced_data = balance_data.balance_data(relevant_features, y, threshold_balance_data)