def problem_1c(n=500): algorithmList = [ 'Perceptron', 'Perceptron with margin', 'Winnow', 'Winnow with margin', 'AdaGrad' ] bestParaList = {500: [(1, 0), (0.03, 1), (1.1, 0), (1.1, 2.0), (0.25, 1)], \ 1000: [(1, 0), (0.03, 1), (1.1, 0), (1.1, 2.0), (0.25, 1)]} t = Trainer() d = t.data_generator(l=10, m=100, n=n, number_of_instances=50000, noise=False) x, y = d['x'], d['y'] initDict = initDictGenerator(n=t.n) color = 'rgbyk' for idx in range(len(algorithmList)): algorithm = algorithmList[idx] algorithmInit = initDict[algorithm] if n in bestParaList: lr, mg = bestParaList[n][idx] else: lr, mg = bestParaList[500][idx] algorithmInit['learning rate'] = [lr] algorithmInit['margin'] = [mg] t.learning(algorithm, x, y, initDict={algorithm: algorithmInit}, times=1) t._unpack_resDict(lr, mg) plt.plot(t.mistake_list, color[idx]) plt.legend(algorithmList, loc='best') plt.title('Plots for n = %s' % n) plt.show()
def problem_1b(n=500): # change n for two size of datasets algorithmList = [ 'Perceptron', 'Perceptron with margin', 'Winnow', 'Winnow with margin', 'AdaGrad' ] t = Trainer() d = t.data_generator(l=10, m=100, n=n, number_of_instances=50000, noise=False) x, y = d['x'], d['y'] D1_x, D1_y, D2_x, D2_y = d['D1_x'], d['D1_y'], d['D2_x'], d['D2_y'] initDict = initDictGenerator(n=t.n) for algorithm in algorithmList: algorithmInit = initDict[algorithm] learningRateList = algorithmInit['learning rate'] marginList = algorithmInit['margin'] t.learning(algorithm, D1_x, D1_y, initDict=initDict, times=20) for lr in learningRateList: for mg in marginList: err_rate = t.error_estimate(t.D2_x, t.D2_y, lr, mg) mistake = t.mistakeCount(lr, mg) print( 'LR: {0: >6s}, MG: {1: >6s}, ER: {2: >6s}, Mis: {3: >6s}'. format(str(lr), str(mg), str(err_rate), str(mistake)))
def problem_2_plot(): algorithmList = [ 'Perceptron', 'Perceptron with margin', 'Winnow', 'Winnow with margin', 'AdaGrad' ] bestParaList = { 40: [(1, 0), (0.25, 1), (1.1, 0), (1.1, 2.0), (1.5, 1)], 80: [(1, 0), (0.03, 1), (1.1, 0), (1.1, 2.0), (0.25, 1)], 120: [(1, 0), (0.03, 1), (1.1, 0), (1.1, 2.0), (0.25, 1)], 160: [(1, 0), (0.03, 1), (1.1, 0), (1.1, 2.0), (0.25, 1)], 200: [(1, 0), (0.25, 1), (1.1, 0), (1.1, 2.0), (1.5, 1)] } record = {} for algorithm in algorithmList: record[algorithm] = [] for n in range(40, 240, 40): print() t = Trainer() d = t.data_generator(l=10, m=20, n=n, number_of_instances=50000, noise=False) x, y = d['x'], d['y'] initDict = initDictGenerator(n=t.n) color = 'rgbyk' for idx in range(len(algorithmList)): algorithm = algorithmList[idx] algorithmInit = initDict[algorithm] if n in bestParaList: lr, mg = bestParaList[n][idx] else: lr, mg = bestParaList[500][idx] algorithmInit['learning rate'] = [lr] algorithmInit['margin'] = [mg] t.learningWithStop(algorithm, x, y, initDict={algorithm: algorithmInit}, times=1) print(len(t.resDict[lr, mg][3])) t._unpack_resDict(lr, mg) record[algorithm].append(t.mistake_list[-1]) i = 0 for algorithm in algorithmList: plt.plot(range(40, 240, 40), record[algorithm], color[i]) i += 1 plt.legend(algorithmList, loc='best') plt.xlabel('Number of total features') plt.xticks(range(40, 240, 40)) plt.ylabel('Total Mistakes before stop') #plt.title('Plots for n = %s'%n) plt.savefig('p2plot.png', dpi=144) plt.show()
def problem_3_dataGenerator(): l = 10 n = 1000 for m in [100, 500, 1000]: t = Trainer() train = t.data_generator(l=l, m=m, n=n, number_of_instances=50000, noise=True) test = t.data_generator(l=l, m=m, n=n, number_of_instances=10000, noise=False) np.save('p3trainX_m=%s' % m, train['x']) np.save('p3trainY_m=%s' % m, train['y']) np.save('p3testX_m=%s' % m, test['x']) np.save('p3testY_m=%s' % m, test['y'])
def problem_3_pureDataGenerator(): l = 10 n = 1000 for m in [100, 500, 1000]: t = Trainer() train = t.data_generator(l=l, m=m, n=n, number_of_instances=50000, noise=False) np.save('p3pureX_m=%s' % m, train['x']) np.save('p3pureY_m=%s' % m, train['y'])
def problem_4(): i = 0 t = Trainer() l, m, n = 10, 20, 40 l1 = [] l2 = [] color = 'rgbymc' lr, mg = (0.25, 1) initDict = initDictGenerator(n=n) algorithm = 'AdaGrad' algorithmInit = initDict[algorithm] algorithmInit['learning rate'] = [lr] algorithmInit['margin'] = [mg] for j in range(50): d = t.data_generator(l=l, m=m, n=n, number_of_instances=10000, noise=True) x, y = d['x'], d['y'] t.learningHingeLoss(algorithm, x, y, initDict={algorithm: algorithmInit}, times=1) res = t.resDict[(lr, mg)] w, theta = res[0], res[1] hinge, mis = t.hinge_and_mis(x, y, w, theta) l1.append(hinge) l2.append(mis) plt.plot(range(1, 51), l1, color[i]) plt.plot(range(1, 51), l2, color[i + 1]) plt.xlabel('Datasets') plt.ylabel('Total value of Hinge Loss / Misclassification Loss') plt.yticks(range(0, 3000, 300)) plt.legend(['Hinge Loss', 'Misclassification Loss']) plt.savefig('p4plot.png', dpi=144) plt.show()