def main(): nbOutput = 4 file = IOHelper().checkArg(sys.argv) if (len(file) < 2): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) featuresId = range(7, 19) # nbInput = len(featuresId) X = generateDataset(d, featuresId) X, nbInput = d.featureExpand(d, X) X = d.featureRescale(d, X) allWeight, AllOutput = csvToArray(file[1]) allclassifier = MultiClassifier(nbInput, AllOutput) allclassifier.initWeight(allWeight) with open('houses.csv', 'w') as file: file.write("Index,Hogwarts House\n") for i, d in enumerate(X): name = allclassifier.predict(d) file.write(str(i) + "," + name + "\n")
def main(): file = IOHelper().checkArg(sys.argv) if (len(file) < 1): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) fig, axes = plt.subplots(figsize=(18, 10)) fig.tight_layout() start = 6 width = 13 widthStart = 0 widthEnd = widthStart + width ystart = start for i in range(width): drawOneSub(d, start, ystart, range(widthStart, widthEnd)) widthStart += width widthEnd += width start += 1 print("") # plt.title(d.getName(index)) plt.savefig('scatter_plot.png') plt.show()
def main(): file = IOHelper().checkArg(sys.argv) if (len(file) < 1): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) fig, axes = plt.subplots(figsize=(18, 10)) fig.tight_layout() for index in range(6, 19): mean = d.mean(d.getFeature(index)) std = d.standardDeviation(d.getFeature(index), mean) if mean >= -10 and mean <= 10 and std >= -10 and std <= 10: addFeatureOnSubplot(index, d.getFeature(index), d.getName(index), 1) elif mean >= -500 and mean <= 500 and std >= -500 and std <= 500: addFeatureOnSubplot(index, d.getFeature(index), d.getName(index), 2) elif mean >= -2000 and mean <= 2000 and std >= -2000 and std <= 2000: addFeatureOnSubplot(index, d.getFeature(index), d.getName(index), 3) else: addFeatureOnSubplot(index, d.getFeature(index), d.getName(index), 4) # plt.title(d.getName(index)) plt.savefig('scatter_plot.png') plt.show()
def main(): file = IOHelper().checkArg(sys.argv) if (len(file) < 1): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) d.printFeatureHeader() d.printAllFeature()
def main(): nbInput = 0 epoch = 30 file = IOHelper().checkArg(sys.argv) if (len(file) < 2): print("Missing file") exit(1) featuresId = range(7, 19) ### train d = Dataset() d.loadFile(file[0]) # nbInput = len(featuresId) X, Y = generateDataset(d, featuresId) X, nbInput = d.featureExpand(d, X) X = d.featureRescale(d, X) ### test d_test = Dataset() d_test.loadFile(file[1]) X_test, Y_test = generateDataset(d_test, featuresId) X_test, nbInput = d_test.featureExpand(d_test, X_test) X_test = d_test.featureRescale(d_test, X_test) houseArray = d.getFeature(1, uniq=True) allclassifier = MultiClassifier(nbInput, houseArray) lr = 10.0 oldLoss = 9e+9 allclassifier.setLr(lr) # allclassifier.printInfo() for j in range(epoch): loss = allclassifier.train(X, Y) allLoss = loss.sum() if abs(allLoss) > abs(oldLoss) and lr > 0.000000001: lr /= 10 print("DECREASE TO " + str(lr)) allclassifier.setLr(lr) oldLoss = allLoss allclassifier.saveWeight() # y_true, y_pred = generatePrediction(allclassifier, X, Y) y_true, y_pred = generatePrediction(allclassifier, X_test, Y_test) # print(y_true) # print(y_pred) acc = accuracy_score(y_true, y_pred) * 100 print("epoch: {0:<15.5g} Loss1: {1:<15.5g} Loss2: {2:<15.5g} Loss3: {3:<15.5g} Loss4: {4:<15.5g} LOSS: {5:<15.5g} Accuracy: {6:<g}%" \ .format(j, loss[0], loss[1], loss[2], loss[3], allLoss, acc))
def main(): file = IOHelper().checkArg(sys.argv) if (len(file) < 1): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) for index in range(6, 19): x1 = d.getFeature(index, 1, 'Gryffindor') x2 = d.getFeature(index, 1, 'Hufflepuff') x3 = d.getFeature(index, 1, 'Ravenclaw') x4 = d.getFeature(index, 1, 'Slytherin') x1s = sorted(d.getFeature(index, 1, 'Gryffindor')) x2s = sorted(d.getFeature(index, 1, 'Hufflepuff')) x3s = sorted(d.getFeature(index, 1, 'Ravenclaw')) x4s = sorted(d.getFeature(index, 1, 'Slytherin')) ax = plt.subplot(1, 1, 1) plt.tight_layout() ax.set_xlim([-10, len(x1) + 10]) plt.scatter(np.arange(len(x1)), x1, c='b', s=10, alpha=0.3, label='Gryffindor') plt.scatter(np.arange(len(x2)), x2, c='g', s=10, alpha=0.3, label='Hufflepuff') plt.scatter(np.arange(len(x3)), x3, c='c', s=10, alpha=0.3, label='Ravenclaw') plt.scatter(np.arange(len(x4)), x4, c='r', s=10, alpha=0.3, label='Slytherin') plt.scatter(np.arange(len(x1)), x1s, c='b', s=10, alpha=0.3) plt.scatter(np.arange(len(x2)), x2s, c='g', s=10, alpha=0.3) plt.scatter(np.arange(len(x3)), x3s, c='c', s=10, alpha=0.3) plt.scatter(np.arange(len(x4)), x4s, c='r', s=10, alpha=0.3) plt.title(d.getName(index)) plt.ylabel('Worst <---> Best') plt.xlabel('Evaluation') plt.legend() plt.tight_layout() # plt.set_xlim([-10, len(x1) + 10]) plt.show()
def main(): file = IOHelper().checkArg(sys.argv) if (len(file) < 1): print("Missing file") exit(1) d = Dataset() d.loadFile(file[0]) print(d.getName(1)) HOUSE = {} for house in d.getFeature(1, uniq=True): HOUSE[house] = {} HOUSE[house]['1moy'] = [] HOUSE[house]['2min'] = [] HOUSE[house]['3q25'] = [] HOUSE[house]['4q50'] = [] HOUSE[house]['5q75'] = [] HOUSE[house]['6max'] = [] HOUSE['name'] = [] print(house) d.printFeatureHeader() index = 6 while index <= 18: nom = d.getName(index) nb = d.count(d.getFeature(index, 1, house)) moy = d.mean(d.getFeature(index, 1, house)) std = d.standardDeviation(d.getFeature(index, 1, house), moy) min1 = d.min(d.getFeature(index, 1, house)) q25, q50, q75 = d.quartile(d.getFeature(index, 1, house)) max1 = d.max(d.getFeature(index, 1, house)) print("{0:<40s} {1:<15.5g} {2:<15.5g} {3:<15.5g} {4:<15.5g} {5:<15.5g} {6:<15.5g} {7:<15.5g} {8:<15.5g}" \ .format(nom, nb, std, moy, min1, q25, q50, q75, max1)) index += 1 HOUSE[house]['1moy'].append(moy) HOUSE[house]['2min'].append(min1) HOUSE[house]['3q25'].append(q25) HOUSE[house]['4q50'].append(q50) HOUSE[house]['5q75'].append(q75) HOUSE[house]['6max'].append(max1) HOUSE['name'].append(nom) print("") x = np.arange(len(HOUSE['Gryffindor']['1moy'])) width = 0.035 w = 0 for i in sorted(HOUSE['Gryffindor']): # for i in sorted(HOUSE['Gryffindor'].iterkeys()): if w == 0: plt.bar(x + (width * (w + 1)), HOUSE['Gryffindor'][i], width, color='b', label='Gryffindor') plt.bar(x + (width * (w + 2)), HOUSE['Hufflepuff'][i], width, color='r', label='Hufflepuff') plt.bar(x + (width * (w + 3)), HOUSE['Ravenclaw'][i], width, color='g', label='Ravenclaw') plt.bar(x + (width * (w + 4)), HOUSE['Slytherin'][i], width, color='c', label='Slytherin') else: plt.bar(x + (width * (w + 1)), HOUSE['Gryffindor'][i], width, color='b') plt.bar(x + (width * (w + 2)), HOUSE['Hufflepuff'][i], width, color='r') plt.bar(x + (width * (w + 3)), HOUSE['Ravenclaw'][i], width, color='g') plt.bar(x + (width * (w + 4)), HOUSE['Slytherin'][i], width, color='c') w += 4 plt.tight_layout() plt.xticks(x, HOUSE['name'], rotation='vertical') plt.xlabel('Cours') plt.ylabel('Evaluation') plt.legend() plt.show()