def c45(X_train, y_train, X_test, y_test): pyC45.train(X_train, y_train, "DecisionTree.xml") # test the C45 decision tree answer = [] testing_obs = [] for index, row in y_test.iteritems(): # testing_obs.append(row[:-1].tolist()) answer.append(str(row)) prediction = pyC45.predict("DecisionTree.xml", X_test) return answer, prediction
def trainandpredict(self, trainX, trainY, testX, testY): startTime = datetime.now() pyC45.train(trainX, trainY, "DecisionTree.xml") start_time = datetime.now() # test the C45 decision tree answer = [] testing_obs = [] for index, row in testY.iteritems(): answer.append(str(row)) startTime = datetime.now() prediction = pyC45.predict("DecisionTree.xml", testX) predictionTime = datetime.now() dt = datetime.now() - start_time ms = (dt.days * 24 * 60 * 60 + dt.seconds) * 1000 + dt.microseconds / 1000.0 print ('Time taken by this algo in millisec:' + str(ms)) return answer, prediction
import pyC45, csv if __name__ == "__main__": #train a C45 decision tree and save the tree as an XML file reader = csv.reader(file('./data/training_set.csv')) training_obs = [] training_cat = [] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train(training_obs, training_cat, "DecisionTree.xml") #test the C45 decision tree reader = csv.reader(file('./data/training_set.csv')) answer = [] testing_obs = [] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction = pyC45.predict("DecisionTree.xml", testing_obs) err = 0 for i in range(len(answer)): if not answer[i] == prediction[i]: err = err + 1 print "error rate=", round(float(err) / len(prediction) * 100, 2), "%"
training_obs = [] training_cat = [] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train( training_obs, training_cat, "C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/dataDecisionTree.xml" ) #test the C45 decision tree reader = csv.reader( open( 'C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/data/training_set.csv' )) answer = [] testing_obs = [] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction = pyC45.predict( "C:/Users/Luiz Felipe/Documents/TCC/assistenteX/plugin/dataDecisionTree.xml", testing_obs) err = 0 for i in range(len(answer)): if not answer[i] == prediction[i]: err = err + 1 print("error rate=", round(float(err) / len(prediction) * 100, 2), "%")
import pyC45,csv if __name__=="__main__": #train a C45 decision tree and save the tree as an XML file reader = csv.reader(file('./data/training_set.csv')) training_obs=[] training_cat=[] for line in reader: training_obs.append(line[:-1]) training_cat.append(line[-1]) pyC45.train(training_obs,training_cat,"DecisionTree.xml") #test the C45 decision tree reader = csv.reader(file('./data/training_set.csv')) answer=[] testing_obs=[] for line in reader: testing_obs.append(line[:-1]) answer.append(line[-1]) answer.pop(0) prediction=pyC45.predict("DecisionTree.xml",testing_obs) err=0 for i in range(len(answer)): if not answer[i]==prediction[i]: err=err+1 print "error rate=",round(float(err)/len(prediction)*100,2),"%"