def plot_confusion_matrix(model, relevant_features_new, y_new,
                          threshold_classification):

    extra_trees = ExtraTreesClassifier(n_estimators=1000, random_state=0)
    base_classification = Base_Classification(model, extra_trees)

    #sss = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=0)
    sss = StratifiedKFold(n_splits=3, shuffle=False, random_state=10)
    for train_index, test_index in sss.split(relevant_features_new, y_new):
        x_train, x_test = relevant_features_new.iloc[
            train_index, :], relevant_features_new.iloc[test_index, :]
        y_train, y_test = y_new.iloc[train_index, :], y_new.iloc[test_index, :]
        break

    #x_train, x_test, y_train, y_test = train_test_split(relevant_features_new, y_new, test_size=0.3, random_state=42)
    extra_trees.fit(x_train, y_train)
    pred = extra_trees.predict_proba(x_test)
    pred = pd.DataFrame(pred, columns=extra_trees.classes_)
    valid_indexes = base_classification.get_accuracy.get_indexes_with_valid_predictions(
        pred, threshold_classification)

    x_test_valid = x_test.iloc[valid_indexes, :]
    y_test_valid = y_test.iloc[valid_indexes, :]

    base_classification.get_accuracy.plot_confusion_matrix(
        x_test_valid, y_test_valid, extra_trees)
    print("Accuracy => {}".format(extra_trees.score(x_test_valid,
                                                    y_test_valid)))
    base_classification.get_accuracy.plot_confusion_matrix(
        x_test, y_test, extra_trees)
    print("Accuracy => {}".format(extra_trees.score(x_test, y_test)))
from classifiers.base_classification import Base_Classification
import pandas as pd
from sklearn.model_selection import train_test_split
from pre_processing.processing_db_files import Processing_DB_Files
from utils.project import Project, slash
from scripts.save_workspace import save
from sklearn.model_selection import StratifiedKFold
import numpy as np

#===INITIALIZATION===#
Debug.DEBUG = 0
hmp = HMP_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators=10000, random_state=0)
base_classification = Base_Classification(hmp, extra_trees)

#===LOAD FEATURES===#

#Interate threshold to find de best value#
s = save()
person_list = ["f1", "m1", "m2"]
accuracy_threshould_list = []
data = {}
threshold = 0.35
project.log(
    "=========== HMP Outlier Accuracy, Thresold = {}===========".format(
        threshold))
for p in person_list:
    project.log("===========Person {}===========".format(p))
    data = s.load_var("hmp_relevant_features{}relevant_features_{}.pkl".format(
Пример #3
0
from classifiers.base_classification import Base_Classification
import pandas as pd
from pre_processing.processing_db_files import Processing_DB_Files  
from utils.project import Project, slash
from scripts.save_workspace import save
from sklearn.model_selection import StratifiedKFold
import numpy as np
from sklearn.model_selection import train_test_split

#===INITIALIZATION===#
Debug.DEBUG = 0
arcma = ARCMA_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators = 1000, random_state=0)
base_classification = Base_Classification(arcma, extra_trees)
s = save()


#person = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] #UMAFALL
#person = ["f1", "m1", "m2", "f2", "m3", "f3", "m4", "m5", "m6", "m7", "f4", "m8", "m9", "f5", "m10", "m11"]#HMP
person = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]#ARCMA
valid_persons = list()
for p in person:
    relevant_features = s.load_var("arcma_relevant_features{}relevant_features_{}.pkl".format(slash, p))
    y = s.load_var("arcma_relevant_features{}y_{}.pkl".format(slash, p))
    y = pd.DataFrame(y, columns=[arcma.label_tag])
    #def balanced_dataset(y, model,min_samples):
    classes_counts = y["activity"].value_counts()
    first_counts = classes_counts
    classes_counts.plot(kind='bar', title='Base ARCMA desbalanceada.')
from sklearn.model_selection import train_test_split
from pre_processing.processing_db_files import Processing_DB_Files
import itertools
from sklearn.metrics import accuracy_score
from utils.project import Project
import time

#===INITIALIZATION===#
Debug.DEBUG = 0
hmp = HMP_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators=1000,
                                   max_depth=1000,
                                   random_state=0)  #Good performer
base_classification = Base_Classification(hmp, extra_trees)
_, _, _ = base_classification.predict_outliers_for_list_people_with_proba(
    36, ["f1", "m1", "m2"], "eat_soup", 0.55, remove_outliers=0.05)

#===Extract TsFresh Features===#
dataframe_1 = hmp.data_with_window["f1"]["training"]
dataframe_2 = pd.DataFrame()
labels = []
id = 1
for d in dataframe_1:
    if len(np.unique(d[hmp.label_tag])) < 2:
        d["id"] = pd.Series(np.full((1, d.shape[0]), id)[0], index=d.index)
        d["time"] = pd.Series(range((id - 1) * d.shape[0], id * d.shape[0]),
                              index=d.index)
        labels.append(d["activity"].iloc[0])
        dataframe_2 = dataframe_2.append(d, ignore_index=True)
Пример #5
0
# -*- coding: utf-8 -*-
# IMPORTS #
from utils.debug import Debug
from sklearn.ensemble import ExtraTreesClassifier  # Extra Trees
from models.hmp_model import HMP_Model
from classifiers.base_classification import Base_Classification

#===INITIALIZATION===#
Debug.DEBUG = 0
hmp = HMP_Model()
#extra_trees = ExtraTreesClassifier(n_estimators = 10000, max_depth=1000, random_state=0) #Good performer
extra_trees = ExtraTreesClassifier(n_estimators=100,
                                   max_depth=100,
                                   random_state=0)  #To test
base_classification = Base_Classification(hmp, extra_trees)

#TEST CLASSIFICATION
#accuracies, accuracies_proba = base_classification.predict_for_list_people_with_proba(50, ["f1", "m1", "m2"] ,0.55)
#extra_trees_features_importance = extra_trees.feature_importances_

#TEST OUTLIER DETECTION

return_dataframe, return_accuracy, data_from_each_person = base_classification.predict_outliers_for_list_people_with_proba(
    50, ["f1", "m1", "m2"], "eat_soup", 0.55, remove_outliers=0.05)

#===>> HISTORIC
#1º filtro de outliers implementado, mas não deu resultado.
#2º Utilizar MinMaxScaler para normalizar os dados.
#3º (Artigo -> https://scialert.net/fulltextmobile/?doi=jas.2010.950.958)Incluir os parametros do modelo ARIMA nas features - Pesquisar como utilizar o retorno de seasonal_decompose como entrada em algoritmo de classificação.
''' NOTES
Пример #6
0
import pandas as pd
from sklearn.model_selection import train_test_split
from pre_processing.processing_db_files import Processing_DB_Files  
from utils.project import Project, slash
from scripts.save_workspace import save
from sklearn.model_selection import StratifiedKFold
import numpy as np


#===INITIALIZATION===#
Debug.DEBUG = 0
umafall = UMAFALL_Model()
processing = Processing_DB_Files()
project = Project()
extra_trees = ExtraTreesClassifier(n_estimators = 10000, random_state=0)
base_classification = Base_Classification(umafall, extra_trees)

#===LOAD FEATURES===#

#Interate threshold to find de best value#
s = save()
person_list = [14,15, 16, 17]
accuracy_threshould_list = []
data = {}
threshold = 0.65
project.log("=========== UMAFALL Outlier Accuracy, Thresold = {}===========".format(threshold), file="umafall_log.log")
for p in person_list:
    project.log("===========Person {}===========".format(p), file="umafall_log.log")
    data = s.load_var("umafall_relevant_features{}relevant_features_{}.pkl".format(slash, p))
    y = s.load_var("umafall_relevant_features{}y_{}.pkl".format(slash, p))
    y = pd.DataFrame(y, columns=[umafall.label_tag])