def initPredict(self): datas = csv.readCSVFile(self.trainFilename, ',') if (datas is None): return False try: trainFeatures = datas[self.featureColumns] except Exception: print("One or multiple columns beetween: ", ", ".join(self.featureColumns), " doesn't exits") try: self.classes = datas[self.classifyColumn] except Exception: print("the classify column ", self.classifyColumn, "doesn't exits") predictFeature = csv.readCSVFile(self.predictFilename, ',') if (predictFeature is None): return False try: predictFeature = predictFeature[self.featureColumns] except Exception: print("One or multiple columns beetween: ", ", ".join(self.featureColumns), " doesn't exits") self.predictFeatures = self.normalizePredictFile( trainFeatures, predictFeature) self.predictFeatures.insert( 0, 'theta0', [1.0 for _ in range(self.predictFeatures.shape[0])]) if (self.predictFeatures is not None): self.initPredictDone = True return self.initPredictDone
def predictAll(self): predicts = [] if (self.initPredictDone is False): print( "The function init must return True before call the function train" ) return None if (self.predictFilename is None): print("Add the predict filename in constructor") return None datas = csv.readCSVFile("thetas.csv", ',') if (datas is None): return False try: thetas = datas[['theta0'] + self.featureColumns] except Exception: print("Error in thetas File") return None classNames = self.classes.unique() for values in self.predictFeatures.values: results = [ self.predict(lineThetas, values.tolist()) for lineThetas in thetas.values ] predicts.append(classNames[results.index(max(results))]) dataframe = {} dataframe.update({self.classifyColumn: predicts}) return pd.DataFrame(dataframe)
def main(): vlen = len(sys.argv) if (vlen == 2 or (vlen == 3 and sys.argv[1] == "-all")): datas = csv.readCSVFile(sys.argv[vlen - 1], ',') if (datas is None): sys.exit(1) subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects) if (subjectDatas is None): sys.exit(1) subjectNames, subjectsByHouse = csv.getSubjectValueByHouse(datas) fig = plt.figure() elements = [] size = len(subjectNames) sqrtSize = math.sqrt(size) sqrtSize = int(sqrtSize) if (sqrtSize == int(sqrtSize)) else int(sqrtSize + 1) if (vlen == 3): for subjectIndex in range(size): elements.append( fig.add_subplot(sqrtSize, sqrtSize, subjectIndex + 1)) elements[subjectIndex].hist(subjectsByHouse[subjectIndex], label=csv.houseNames, color=csv.colors) elements[subjectIndex].set_title(subjectNames[subjectIndex]) elements[subjectIndex].legend() else: elements.append(fig.add_subplot(1, 1, 1)) elements[0].hist(subjectsByHouse[10], label=csv.houseNames, color=csv.colors) elements[0].set_title(subjectNames[10]) elements[0].legend() plt.show() else: print('Error script : python histogram.py [-all] file.')
def main(): if len(sys.argv) == 2: datas = csv.readCSVFile(sys.argv[1], ',') if (datas is None): sys.exit(1) subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects) if (subjectDatas is None): sys.exit(1) sns.relplot(x="Astronomy", y="Defense Against the Dark Arts", hue="Hogwarts House", hue_order=csv.houseNames, palette=csv.colors, data=subjectDatas) plt.show() else: print('Error script : python scatter_.py file.')
def main(): vlen = len(sys.argv) if (vlen == 2 or (vlen == 3 and sys.argv[1] == "-all")): datas = csv.readCSVFile(sys.argv[vlen - 1], ',') if (datas is None): sys.exit(1) subjectDatas = csv.dropColumns(datas, csv.notArithmetiqueSubjects) if (subjectDatas is None): sys.exit(1) # print(subjectDatas.describe()) print("Describe general:\n{}\n".format(describe(subjectDatas))) if (vlen == 3): for house in csv.houseNames: print("Describe {}:\n{}\n".format(house, describe(subjectDatas.loc[subjectDatas['Hogwarts House'].isin({house})]))) else: print('Error script : python describe.py [-all] file.')
def initTrain(self): datas = csv.readCSVFile(self.trainFilename, ',') if (datas is None): return False try: self.features = self.normalize(datas[self.featureColumns]) self.features.insert(0, 'theta0', [1.0 for _ in range(self.features.shape[0])]) except Exception: print("One or multiple columns beetween: ", ", ".join(self.featureColumns), " doesn't exits") try: self.classes = datas[self.classifyColumn] except Exception: print("the classify column ", self.classifyColumn, "doesn't exits") if (all(variable is not None for variable in [self.features, self.classes])): self.initTrainDone = True return self.initTrainDone