示例#1
0
    def generate(self,train_data,test_data,subsampling,subsamples):
        """ Takes an ID3 generated tree and compares it's predictions to testing data

            :param arbre: an ID3 generated classification tree
            :param test_data: testing data formalised as in serie 10
        """
        
        print("Generating " + str(subsamples) + " subsamples with " + str(subsampling) + " times subsampling")
        train_set = self.generate_training_set(train_data,int(len(train_data)/subsampling),subsamples)
        
        print("Generating trees ...")
        id3 = ID3()
        tree_set = []
        binTest = BinTestEnv()
        for item in train_set:
            tree_set.append(id3.construit_arbre(item))
        

        print("Evaluating trees ...")
        self.tree_acc_set = [];
        for tree in tree_set: 
            try: #certains arbres générés sont incapables de traiter certaines données (le subset de données d'entrainement ne contenant pas forcément tous les cas de figures pour chaques attribut) c'est méga deg mais je fait juste un try catch pour ignorer ces cas làs...
                accuracy = binTest.tree_test(tree, train_data, False)
            except:
                pass
            else:
                self.tree_acc_set.append( (accuracy, tree) )
        
        inv_rate = len(self.tree_acc_set)/len(tree_set)
        inv_numb = len(tree_set) - len(self.tree_acc_set)

        bestTree = max(self.tree_acc_set, key = itemgetter(0))
        print("Accuracy = " + str(binTest.tree_test(bestTree[1],test_data,False)*100) + "%")
        print(str(inv_numb) + " invalid trees, invalid rate is : " + str(inv_rate*100) + "%")
        return bestTree[1]
    def __init__(self):
        # Do computations here

        # Task 1
        self.file_train = 'data/train_bin.csv'
        donnees_train = csv_reader(self.file_train)
        self.id3 = ID3()
        self.arbre = self.id3.construit_arbre(donnees_train)

        # Task 2
        self.file_test = 'data/test_public_bin.csv'

        # Task 3
        self.regles = generateur_de_regles(self.arbre)
        self.faits_initiaux = derive_faits_depuis_fichier(donnees_train)

        # Task 4

        # Task 5
        self.file_continuous_train = 'data/train_continuous.csv'
        donnees_continuous_train = csv_reader(self.file_continuous_train)
        self.id3_continuous = ID3Continuous()
        self.arbre_advance = self.id3_continuous.construit_arbre(
            donnees_continuous_train)
        self.file_continuous_test = 'data/test_public_continuous.csv'
示例#3
0
    def __init__(self):
        # load the datasets
        self.train_set = load_train_set()
        self.test_set = load_test_set()
        # Do computations here

        # Task 1
        id3 = ID3()
        self.arbre = id3.construit_arbre(self.train_set)
        print(self.tree_properties)

        # Task 2
        print("-----------------------------------------------------")
        self.test_env = TestEnv(self.arbre, self.test_set)
        print(self.test_env)
        print(self.tree_properties)
        # Task 3/4
        print("-----------------------------------------------------")
        g = GenereRegles(self.arbre, self.train_set)
        self.faits_initiaux = self.train_set
        self.regles = g.regles
        #TODO Faut il expliqué le test ou train set ?
        #Car si on explique le test on classifie 40% de fau et donc les explications non pas de sens
        #TODO print le nombre de personess sauvée avec 1 ou 2 changement
        g.explique_all(self.test_set, g.regles)
        g.explique_all(self.train_set, g.regles)

        # Task 5
        moteur = ID3_advance()
        continous = load_continous_train_set()
        continous_test = load_continous_test_set()
        self.arbre_advance = moteur.construit_arbre(continous)
        self.test_env_advanced = TestEnv(self.arbre_advance, continous_test)
        print("-----------------------------------------------------")
        print(self.test_env_advanced)
示例#4
0
    def __init__(self):

        # load data
        donnees_entrainement, self.donnees_test, donnees_entrainement_adv, self.donnees_test_adv = self.importer_donnees(
        )

        id3 = ID3()

        #Task 1
        self.arbre = id3.construit_arbre(donnees_entrainement)

        #Statistics pour task 1
        self.stat = StatistiquesID3()

        # Task 3

        self.faits_initiaux = donnees_entrainement  #Est ce que c'est ça qu'on veut vraiment?
        self.regles = self.generer_regles(self.arbre)
        self.stat.calculer_statistiques(self.regles)

        #Task 4

        self.diagnostic = Diagnostic(self.regles, self.arbre)

        # Task 5

        id3_adv = ID3_ADV()
        self.arbre_advance = id3_adv.construit_arbre(donnees_entrainement_adv)
示例#5
0
    def __init__(self):

        # Do computations here

        #parsing the data from the csv file
        #print("Parsing pre-binned training data...")
        raw_csv = self.parseCSV("divorce.csv")

        thresh = 100
        train_data = [[
            line['Class'],
            {key: val
             for key, val in line.items() if key != "Class"}
        ] for line in raw_csv.copy()[:thresh]]

        test_data = [[
            line['Class'],
            {key: val
             for key, val in line.items() if key != "Class"}
        ] for line in raw_csv.copy()[thresh:]]

        attributs = self.parseAttribute('divorce_attributes.csv')
        qualificateurs = [("0", "very false"), ("1", "false"),
                          ("2", "neutral"), ("3", " true"), ("4", "very true"),
                          ("Alors 0", "Then no divorce"),
                          ("Alors 1", "Then divorce"), ("=", "is"),
                          ("Si", "If")]

        #print("Generating ID3 tree from " +
        #    str(len(train_data)) + " samples...")
        id3 = ID3()
        self.arbre = Arbre(id3.construit_arbre(train_data))

        #print("Testing...")
        binTest = BinTestEnv()
        accuracy = binTest.tree_test(self.arbre.racine, test_data, True)

        def predict(i):
            classification = self.arbre.racine.classifie(train_data[i][1])
            for val in reversed(attributs):
                classification = classification.replace(val[0], val[1])
            for val in reversed(qualificateurs):
                classification = classification.replace(val[0], val[1])
            return classification

        # for i in range(69):
        #     print(i)
        #     print(predict(i))
        random.seed()
        i = random.randint(0, 100)
        print("Case #" + str(i))
        print(predict(i))
        """
示例#6
0
    def __init__(self, donnees=None, donnees_modif=None):

        # Do computations here
        self.donnees = donnees
        self.donnees_modif = donnees_modif

        # Task 1
        id3 = ID3()
        self.arbre = id3.construit_arbre(donnees)

        # Task 3
        constructeur_de_regles = ReglesConstructeur(self.arbre)
        self.regles = constructeur_de_regles.regles
        self.faits_initiaux = constructeur_de_regles.Faits_initiaux(
            self.donnees)

        # Task 5
        id3_modif = ID3_modif()
        self.arbre_advance = id3_modif.construit_arbre(donnees_modif)
示例#7
0
 def __init__(self):
     # Do computations here
     id3 = ID3()
     id3Advance = ID3Advance()
     self.filename = "train_bin.csv"  # str(input("nom du fichier a ouvrir"))
     # Task 1
     self.arbre = id3.construit_arbre(self.get_datas())
     # Task 2
     self.donnees_train = self.get_datas("train_bin.csv")
     self.donnees_test = self.get_datas("test_public_bin.csv")
     self.precision_rate = self.test_precision(self.arbre)
     # Task 3
     self.faits_initiaux = self.define_faits_initiaux()
     self.regles = self.define_regles(self.arbre)
     self.define_regles(self.arbre, [])
     self.attributs = self.get_attributs()
     # Task 5
     self.donnees_train_continuous = self.get_datas("train_continuous.csv")
     self.donnees_test_continuous = self.get_datas("test_public_continuous.csv")
     self.arbre_advance = id3Advance.construit_arbre(self.donnees_train_continuous)
     self.precision_rate_advance = self.test_precision(self.arbre_advance, self.donnees_test_continuous)
    def __init__(self):

        # Do computations here
        
        #parsing the data from the csv file
        print("Parsing pre-binned training data...")
        train_bin_csv = self.parseCSV("train_bin.csv")
        train_bin = [ [line["target"], {key:val for key, val in line.items() if key != "target"}] for line in train_bin_csv] #Gem bcp les oneliners :)

        # Task 1
        
        print("Generating ID3 tree from " + str(len(train_bin)) + " samples...", end = "")
        id3 = ID3()
        self.arbre = Arbre(id3.construit_arbre(train_bin))
        nb_noeuds = len(self.arbre.noeuds)
        nb_feuilles = len(self.arbre.noeuds_terminaux_profondeur)
        profondeur = self.arbre.profondeur()
        moyenne_longueur_branche = sum([self.arbre.longueur_branche(feuille_longueur[0]) for feuille_longueur in self.arbre.noeuds_terminaux_profondeur])/len(self.arbre.noeuds_terminaux_profondeur)
        moyenne_enfants_noeud = sum([len(noeud.enfants) for noeud in self.arbre.noeuds if noeud.enfants != None])/len([noeud for noeud in self.arbre.noeuds if not noeud.terminal()])
        print(" Done!")

        print("L'arbre a un {} noeuds dont {} feuilles".format(nb_noeuds,nb_feuilles))
        print("L'arbre a une profondeur de " + str(profondeur))
        print("La moyenne du nombre d'enfants par noeud est " +str(moyenne_enfants_noeud))
        print("La moyenne de la longueur d'une branche est " +str(moyenne_longueur_branche)) 
        

        #Task 2
        
        print("Parsing pre-binned testing data...")
        test_public_bin_csv = self.parseCSV("test_public_bin.csv")
        test_public_bin = [ [line["target"], {key:val for key, val in line.items() if key != "target"}] for line in test_public_bin_csv]
        """

        print("Setting up testing environnement...")
        binTest = BinTestEnv()

        binTest.tree_test(self.arbre.racine,train_bin)

        # print("Testing training with a random forest :")
        rForest = RandomForest()
        rf_tree = rForest.generate(train_bin,test_public_bin,2,500)

        #binTest.test_forest(rForest,test_public_bin,True)
        # print()
        """
        # Task 3
        """
        self.faits_initiaux = None
        self.regles = None

        Titou = FirstYearMedSchool()
        Titou.apprend(self.arbre.racine)
        
        diagnostics = Titou.diagnostique_hopital(test_public_bin)
        Titou.affiche_diagnostics_hopital(diagnostics)
        #binTest.rule_test(rGen, test_public_bin,True)
        """
        
        
        #Task 4
        
        attributs_et_valeurs = {}
        first = True
        for donnee in train_bin:
            if first:
                for attribut in donnee[1]:
                    attributs_et_valeurs[attribut] = set(donnee[1][attribut])
                first = False
            else:
                for attribut in donnee[1]:
                    attributs_et_valeurs[attribut].update(set(donnee[1][attribut]))
示例#9
0
    def __init__(self):

        # Do computations here

        #parsing the data from the csv file
        print("Parsing pre-binned training data...")
        raw_csv = self.parseCSV("breast-cancer-wisconsin.csv")

        thresholds = []
        accuracies = []

        thresh = 300
        train_data = [[
            line['Class'],
            {
                key: val
                for key, val in line.items() if key != "Class" and key != "id"
            }
        ] for line in raw_csv.copy()[:thresh]]

        test_data = [[
            line['Class'],
            {
                key: val
                for key, val in line.items() if key != "Class" and key != "id"
            }
        ] for line in raw_csv.copy()[thresh:]]

        print("Generating ID3 tree from " + str(len(train_data)) +
              " samples...")
        id3 = ID3()
        self.arbre = Arbre(id3.construit_arbre(train_data))

        #print("Testing...")
        binTest = BinTestEnv()
        accuracy = binTest.tree_test(self.arbre.racine, test_data, True)

        # Task 5

        print(
            "Parsing pre-binned training data for continuous algorith test (removing cases with unknown values)..."
        )
        raw_cont_csv = list(
            filter(lambda x: not ('?' in x.values()),
                   self.parseCSV("breast-cancer-wisconsin.csv")))

        train_cont_data = [[
            line['Class'],
            {
                key: val
                for key, val in line.items() if key != "Class" and key != "id"
            }
        ] for line in raw_cont_csv.copy()[:300]]

        test_cont_data = [[
            line['Class'],
            {
                key: val
                for key, val in line.items() if key != "Class" and key != "id"
            }
        ] for line in raw_cont_csv.copy()[300:]]

        print("Generating continuous ID3 tree from " + str(len(train_data)) +
              " samples...")
        id3_continuous = ID3_continu()
        self.arbre_advance = id3_continuous.construit_arbre(train_cont_data)

        continuousTest = ContinuousTestEnv()

        continuousTest.test(self.arbre_advance, test_cont_data, True)
示例#10
0
    ['rubéole', {
        'fièvre': 'non',
        'amygdales': 'normales',
        'ganglions': 'oui',
        'gêne-à-avaler': 'non',
        'mal-au-ventre': 'non',
        'toux': 'non',
        'rhume': 'non',
        'respiration': 'normale',
        'joues': 'normales',
        'yeux': 'normaux'}
    ],
]


id3 = ID3()
arbre = id3.construit_arbre(donnees)
print('Arbre de décision :')
print(arbre)
print()

print('Exemplification :')
print(arbre.classifie({
        'fièvre': 'non',
        'amygdales': 'normales',
        'ganglions': 'oui',
        'gêne-à-avaler': 'non',
        'mal-au-ventre': 'non',
        'toux': 'non',
        'rhume': 'non',
        'respiration': 'normale',