def test_rnnslu_embeddings(**kwargs): """ Wrapper function for training and testing RNNSLU :type fold: int :param fold: fold index of the ATIS dataset, from 0 to 4. :type lr: float :param lr: learning rate used (factor for the stochastic gradient. :type nepochs: int :param nepochs: maximal number of epochs to run the optimizer. :type win: int :param win: number of words in the context window. :type nhidden: int :param n_hidden: number of hidden units. :type emb_dimension: int :param emb_dimension: dimension of word embedding. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type decay: boolean :param decay: decay on the learning rate if improvement stop. :type savemodel: boolean :param savemodel: save the trained model or not. :type normal: boolean :param normal: normalize word embeddings after each update or not. :type folder: string :param folder: path to the folder where results will be stored. """ # process input arguments param = { "fold": 3, "lr": 0.0970806646812754, "verbose": True, "decay": True, "win": 7, "nhidden": 200, "seed": 345, "emb_dimension": 50, "nepochs": 60, "savemodel": False, "normal": True, "folder": "../result", } param_diff = set(kwargs.keys()) - set(param.keys()) if param_diff: raise KeyError("invalid arguments:" + str(tuple(param_diff))) param.update(kwargs) if param["verbose"]: for k, v in param.items(): print("%s: %s" % (k, v)) # create result folder if not exists check_dir(param["folder"]) # load the dataset print("... loading the dataset") train_set, valid_set, test_set, dic = load_data(param["fold"]) # create mapping from index to label, and index to word idx2label = dict((k, v) for v, k in dic["labels2idx"].items()) idx2word = dict((k, v) for v, k in dic["words2idx"].items()) # unpack dataset train_lex, train_ne, train_y = train_set valid_lex, valid_ne, valid_y = valid_set test_lex, test_ne, test_y = test_set vocsize = len(dic["words2idx"]) nclasses = len(dic["labels2idx"]) nsequences = len(train_lex) groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y] words_valid = [map(lambda x: idx2word[x], w) for w in valid_lex] groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_y] words_test = [map(lambda x: idx2word[x], w) for w in test_lex] # instanciate the model numpy.random.seed(param["seed"]) random.seed(param["seed"]) print("... building the model") rnn = RNNSLU(nh=param["nhidden"], nc=nclasses, ne=vocsize, de=param["emb_dimension"], cs=param["win"]) # train with early stopping on validation set print("... training") best_f1 = -numpy.inf param["clr"] = param["lr"] for e in range(param["nepochs"]): # shuffle shuffle([train_lex, train_ne, train_y], param["seed"]) param["ce"] = e tic = timeit.default_timer() for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param["win"], param["clr"]) print("[learning] epoch %i >> %2.2f%%" % (e, (i + 1) * 100.0 / nsequences), end=" ") print("completed in %.2f (sec) <<\r" % (timeit.default_timer() - tic), end="") sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [ map(lambda x: idx2label[x], rnn.classify(numpy.asarray(contextwin(x, param["win"])).astype("int32"))) for x in test_lex ] predictions_valid = [ map(lambda x: idx2label[x], rnn.classify(numpy.asarray(contextwin(x, param["win"])).astype("int32"))) for x in valid_lex ] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval( predictions_test, groundtruth_test, words_test, param["folder"] + "/current.test.txt", param["folder"] ) res_valid = conlleval( predictions_valid, groundtruth_valid, words_valid, param["folder"] + "/current.valid.txt", param["folder"] ) if res_valid["f1"] > best_f1: if param["savemodel"]: rnn.save(param["folder"]) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid["f1"] if param["verbose"]: print("NEW BEST: epoch", e, "valid F1", res_valid["f1"], "best test F1", res_test["f1"]) param["vf1"], param["tf1"] = res_valid["f1"], res_test["f1"] param["vp"], param["tp"] = res_valid["p"], res_test["p"] param["vr"], param["tr"] = res_valid["r"], res_test["r"] param["be"] = e os.rename(param["folder"] + "/current.test.txt", param["folder"] + "/best.test.txt") os.rename(param["folder"] + "/current.valid.txt", param["folder"] + "/best.valid.txt") else: if param["verbose"]: print("") # learning rate decay if no improvement in 10 epochs if param["decay"] and abs(param["be"] - param["ce"]) >= 10: param["clr"] *= 0.5 rnn = best_rnn if param["clr"] < 1e-5: break print( "BEST RESULT: epoch", param["be"], "valid F1", param["vf1"], "best test F1", param["tf1"], "with the model", param["folder"], ) return rnn, idx2word
def test_rnnslu_n(**kwargs): """ Wrapper function for training and testing RNNSLU :type fold: int :param fold: fold index of the ATIS dataset, from 0 to 4. :type lr: float :param lr: learning rate used (factor for the stochastic gradient. :type nepochs: int :param nepochs: maximal number of epochs to run the optimizer. :type win: int :param win: number of words in the context window. :type nhidden: int :param n_hidden: number of hidden units. :type emb_dimension: int :param emb_dimension: dimension of word embedding. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type decay: boolean :param decay: decay on the learning rate if improvement stop. :type savemodel: boolean :param savemodel: save the trained model or not. :type normal: boolean :param normal: normalize word embeddings after each update or not. :type folder: string :param folder: path to the folder where results will be stored. """ # process input arguments param = { 'fold': 3, 'lr': 0.0970806646812754, 'verbose': True, 'decay': True, 'win': 7, 'nhidden': 200, 'seed': 345, 'emb_dimension': 50, 'nepochs': 60, 'savemodel': False, 'normal': True, 'folder': '../result' } param_diff = set(kwargs.keys()) - set(param.keys()) if param_diff: raise KeyError("invalid arguments:" + str(tuple(param_diff))) param.update(kwargs) if param['verbose']: for k, v in param.items(): print("%s: %s" % (k, v)) # create result folder if not exists check_dir(param['folder']) # load the dataset print('... loading the dataset') train_set, valid_set, test_set, dic = load_data(param['fold']) # create mapping from index to label, and index to word idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) # unpack dataset train_lex, train_ne, train_y = train_set valid_lex, valid_ne, valid_y = valid_set test_lex, test_ne, test_y = test_set vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_lex) groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y] words_valid = [map(lambda x: idx2word[x], w) for w in valid_lex] groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_y] words_test = [map(lambda x: idx2word[x], w) for w in test_lex] # instanciate the model numpy.random.seed(param['seed']) random.seed(param['seed']) print('... building the model') rnn = RNNSLU_n(nh=param['nhidden'], nc=nclasses, ne=vocsize, de=param['emb_dimension'], cs=param['win'], normal=param['normal']) # train with early stopping on validation set print('... training') best_f1 = -numpy.inf param['clr'] = param['lr'] for e in range(param['nepochs']): # shuffle shuffle_rnn([train_lex, train_ne, train_y], param['seed']) param['ce'] = e tic = timeit.default_timer() for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param['win'], param['clr']) print('[learning] epoch %i >> %2.2f%%' % (e, (i + 1) * 100. / nsentences), end=' ') print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='') sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [ map( lambda x: idx2label[x], rnn.classify( numpy.asarray(contextwin(x, param['win'])).astype('int32'))) for x in test_lex ] predictions_valid = [ map( lambda x: idx2label[x], rnn.classify( numpy.asarray(contextwin(x, param['win'])).astype('int32'))) for x in valid_lex ] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, param['folder'] + '/current.test.txt', param['folder']) res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, param['folder'] + '/current.valid.txt', param['folder']) if res_valid['f1'] > best_f1: if param['savemodel']: rnn.save(param['folder']) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid['f1'] if param['verbose']: print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1']) param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1'] param['vp'], param['tp'] = res_valid['p'], res_test['p'] param['vr'], param['tr'] = res_valid['r'], res_test['r'] param['be'] = e os.rename(param['folder'] + '/current.test.txt', param['folder'] + '/best.test.txt') os.rename(param['folder'] + '/current.valid.txt', param['folder'] + '/best.valid.txt') else: if param['verbose']: print('') # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be'] - param['ce']) >= 10: param['clr'] *= 0.5 rnn = best_rnn if param['clr'] < 1e-5: break print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], 'with the model', param['folder']) return rnn, dic
def test_lstm(**kwargs): # process input arguments param = { 'fold': 3, 'lr': 0.1, 'verbose': True, 'decay': True, 'win': 3, 'nhidden1': 300, 'nhidden2': 0, 'seed': 345, 'emb_dimension': 50, 'nepochs': 20, 'savemodel': False, 'folder': '../result' } param_diff = set(kwargs.keys()) - set(param.keys()) if param_diff: raise KeyError("invalid arguments:" + str(tuple(param_diff))) param.update(kwargs) if param['verbose']: for k, v in param.items(): print("%s: %s" % (k, v)) # create result folder if not exists check_dir(param['folder']) # load the dataset print('... loading the dataset') train_set, valid_set, test_set, dic = load_data(param['fold']) # create mapping from index to label, and index to word idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) train_lex_temp, train_ne, train_y_temp = train_set valid_lex_temp, valid_ne, valid_y_temp = valid_set test_lex_temp, test_ne, test_y_temp = test_set train_lex = (7) * numpy.ones((len(train_lex_temp), 30)) for i in range(len(train_lex_temp)): for j in range(min(30, len(train_lex_temp[i]))): train_lex[i][j] = train_lex_temp[i][j] test_lex = (7) * numpy.ones((len(test_lex_temp), 30)) for i in range(len(test_lex_temp)): for j in range(min(30, len(test_lex_temp[i]))): test_lex[i][j] = test_lex_temp[i][j] valid_lex = (7) * numpy.ones((len(valid_lex_temp), 30)) for i in range(len(valid_lex_temp)): for j in range(min(30, len(valid_lex_temp[i]))): valid_lex[i][j] = valid_lex_temp[i][j] train_y = 126 * numpy.ones((len(train_y_temp), 30)) for i in range(len(train_y_temp)): for j in range(min(30, len(train_y_temp[i]))): train_y[i][j] = train_y_temp[i][j] test_y = 126 * numpy.ones((len(test_y_temp), 30)) for i in range(len(test_y_temp)): for j in range(min(30, len(test_y_temp[i]))): test_y[i][j] = test_y_temp[i][j] valid_y = 126 * numpy.ones((len(valid_y_temp), 30)) for i in range(len(valid_y_temp)): for j in range(min(30, len(valid_y_temp[i]))): valid_y[i][j] = valid_y_temp[i][j] vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_lex) groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y] words_valid = [map(lambda x: idx2word[x], w) for w in valid_lex] groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_y] words_test = [map(lambda x: idx2word[x], w) for w in test_lex] # instanciate the model numpy.random.seed(param['seed']) random.seed(param['seed']) # TODO print('... building the model') rnn = LSTM(nh=param['nhidden1'], nh2=param['nhidden2'], nc=nclasses, ne=vocsize, de=param['emb_dimension'], cs=param['win']) # train with early stopping on validation set print('... training') best_f1 = -numpy.inf param['clr'] = param['lr'] for e in range(param['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], param['seed']) param['ce'] = e tic = timeit.default_timer() ########################## #print(rnn.showh2(numpy.asarray(contextwin(test_lex[0], param['win'])).astype('int32'))) ######################### for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param['win'], param['clr']) print('[learning] epoch %i >> %2.2f%%' % (e, (i + 1) * 100. / nsentences), end=' ') print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='') sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [ map( lambda x: idx2label[x], rnn.classify( numpy.asarray(contextwin(x, param['win'])).astype('int32'))) for x in test_lex ] predictions_valid = [ map( lambda x: idx2label[x], rnn.classify( numpy.asarray(contextwin(x, param['win'])).astype('int32'))) for x in valid_lex ] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, param['folder'] + '/current.test.txt', param['folder']) res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, param['folder'] + '/current.valid.txt', param['folder']) if res_valid['f1'] > best_f1: if param['savemodel']: rnn.save(param['folder']) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid['f1'] if param['verbose']: print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1']) param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1'] param['vp'], param['tp'] = res_valid['p'], res_test['p'] param['vr'], param['tr'] = res_valid['r'], res_test['r'] param['be'] = e os.rename(param['folder'] + '/current.test.txt', param['folder'] + '/best.test.txt') os.rename(param['folder'] + '/current.valid.txt', param['folder'] + '/best.valid.txt') else: if param['verbose']: print('') # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be'] - param['ce']) >= 10: param['clr'] *= 0.5 rnn = best_rnn if param['clr'] < 1e-5: break print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], 'with the model', param['folder'])
def test_rnnslu2(kwargs): """ Wrapper function for training and testing RNNSLU :type fold: int :param fold: fold index of the ATIS dataset, from 0 to 4. :type lr: float :param lr: learning rate used (factor for the stochastic gradient. :type nepochs: int :param nepochs: maximal number of epochs to run the optimizer. :type win: int :param win: number of words in the context window. :type nhidden1: int :param n_hidden: number of hidden units in the first hidden layer. :type nhidden2: int :param n_hidden: number of hidden units in the second hidden layer. :type emb_dimension: int :param emb_dimension: dimension of word embedding. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type decay: boolean :param decay: decay on the learning rate if improvement stop. :type savemodel: boolean :param savemodel: save the trained model or not. :type folder: string :param folder: path to the folder where results will be stored. """ # process input arguments param = { 'fold': 3, 'lr': 0.0970806646812754, 'verbose': True, 'decay': True, 'win': 7, 'nhidden1': 200, 'nhidden2': 200, 'seed': 345, 'emb_dimension': 50, 'nepochs': 60, 'savemodel': False, 'normal': True, 'folder':'../result'} param_diff = set(kwargs.keys()) - set(param.keys()) if param_diff: raise KeyError("invalid arguments:" + str(tuple(param_diff))) param.update(kwargs) if param['verbose']: for k,v in param.items(): print("%s: %s" % (k,v)) # create result folder if not exists check_dir(param['folder']) # load the dataset print('... loading the dataset') train_set, valid_set, test_set, dic = load_data(param['fold']) # create mapping from index to label, and index to word idx2label = dict((k, v) for v, k in dic['labels2idx'].items()) idx2word = dict((k, v) for v, k in dic['words2idx'].items()) # unpack dataset train_lex, train_ne, train_y = train_set valid_lex, valid_ne, valid_y = valid_set test_lex, test_ne, test_y = test_set vocsize = len(dic['words2idx']) nclasses = len(dic['labels2idx']) nsentences = len(train_lex) groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y] words_valid = [map(lambda x: idx2word[x], w) for w in valid_lex] groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_y] words_test = [map(lambda x: idx2word[x], w) for w in test_lex] # instanciate the model numpy.random.seed(param['seed']) random.seed(param['seed']) # TODO print('... building the model') rnn = RNNSLU2(nh=param['nhidden1'], nh2=param['nhidden2'], nc=nclasses, ne=vocsize, de=param['emb_dimension'], cs=param['win'], normal=param['normal']) # train with early stopping on validation set print('... training') best_f1 = -numpy.inf param['clr'] = param['lr'] for e in range(param['nepochs']): # shuffle shuffle([train_lex, train_ne, train_y], param['seed']) param['ce'] = e tic = timeit.default_timer() for i, (x, y) in enumerate(zip(train_lex, train_y)): rnn.train(x, y, param['win'], param['clr']) print('[learning] epoch %i >> %2.2f%%' % ( e, (i + 1) * 100. / nsentences), end=' ') print('completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic), end='') sys.stdout.flush() # evaluation // back into the real world : idx -> words predictions_test = [map(lambda x: idx2label[x], rnn.classify(numpy.asarray( contextwin(x, param['win'])).astype('int32'))) for x in test_lex] predictions_valid = [map(lambda x: idx2label[x], rnn.classify(numpy.asarray( contextwin(x, param['win'])).astype('int32'))) for x in valid_lex] # evaluation // compute the accuracy using conlleval.pl res_test = conlleval(predictions_test, groundtruth_test, words_test, param['folder'] + '/current.test.txt', param['folder']) res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, param['folder'] + '/current.valid.txt', param['folder']) if res_valid['f1'] > best_f1: if param['savemodel']: rnn.save(param['folder']) best_rnn = copy.deepcopy(rnn) best_f1 = res_valid['f1'] if param['verbose']: print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1']) param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1'] param['vp'], param['tp'] = res_valid['p'], res_test['p'] param['vr'], param['tr'] = res_valid['r'], res_test['r'] param['be'] = e os.rename(param['folder'] + '/current.test.txt', param['folder'] + '/best.test.txt') os.rename(param['folder'] + '/current.valid.txt', param['folder'] + '/best.valid.txt') else: if param['verbose']: print('') # learning rate decay if no improvement in 10 epochs if param['decay'] and abs(param['be']-param['ce']) >= 10: param['clr'] *= 0.5 rnn = best_rnn if param['clr'] < 1e-5: break print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'], 'best test F1', param['tf1'], 'with the model', param['folder'])
def k_means(X=None, init_c=None, n_iters=50): """K-Means. Argument: X: 2D data points, shape [2, N]. init_c: initial centroids, shape [2, 2]. Each column is a centroid. Return: c: shape [2, 2]. Each column is a centroid. """ if X is None: X, init_c = hw4_utils.load_data() # Note that K = init_c.shape[1] = 2 # Iterate k-Means updates for i in range(0, n_iters): # Save variables mu_1, mu_2 to update the cluster centers later # and compare with the previous two cluster centers to check for # convergence mu_1 = torch.zeros(init_c.shape[0]) mu_2 = torch.zeros(init_c.shape[0]) # Find responsibilities for each data point and find the set of points in each cluster x1, x2 = [], [] for j in range(0, X.shape[1]): mindist, argmin = -1, -1 for k in range(0, init_c.shape[1]): curdist = torch.dist(init_c[:, k], X[:, j]).item() if (mindist < 0 or curdist < mindist): mindist = curdist argmin = k for k in range(0, init_c.shape[1]): if (k == argmin): if (k == 0): x1.append(X[:, j]) else: x2.append(X[:, j]) # Get x-coordinates of points in 1st cluster x11 = [x1[i][0] for i in range(0, len(x1))] # Get y-coordinates of points in 1st cluster x12 = [x1[i][1] for i in range(0, len(x1))] # Get x-coordinates of points in 2nd cluster x21 = [x2[i][0] for i in range(0, len(x2))] # Get y-coordinates of points in 2nd cluster x22 = [x2[i][1] for i in range(0, len(x2))] # Change x1 and x2 into matrices of points that contain exactly the points in the 1st cluster # and the points in the 2nd cluster, respectively x1 = torch.FloatTensor([x11, x12]) x2 = torch.FloatTensor([x21, x22]) # Plot clusterings c1 = torch.FloatTensor([[init_c[0][0]], [init_c[1][0]]]) c2 = torch.FloatTensor([[init_c[0][1]], [init_c[1][1]]]) hw4_utils.vis_cluster(c1, x1, c2, x2, i) # Find new kth cluster center for all k sum_vector = torch.zeros(X.shape[0]) for j in range(0, x1.shape[1]): sum_vector += x1[:, j] mu_1 = (1 / (x1.shape[1])) * (sum_vector) sum_vector = torch.zeros(X.shape[0]) for j in range(0, x2.shape[1]): sum_vector += x2[:, j] mu_2 = (1 / (x2.shape[1])) * (sum_vector) # Check if algorithm converged, and if it did, then output necessary information and halt the algorithm if ((torch.eq(mu_1, init_c[:, 0]).all().item()) and (torch.eq(mu_2, init_c[:, 1]).all().item())): print(f"Number of updates needed for convergence was {i}") output = 0 for j in range(0, x1.shape[1]): output += torch.dist(init_c[:, 0], x1[:, j]).item() for j in range(0, x2.shape[1]): output += torch.dist(init_c[:, 1], x2[:, j]).item() print( f"Algorithm converged to cost function value of {(1/2)*(output)}" ) print(f"Cluster centroids ended up being {mu_1} and {mu_2}") return init_c # Else if not converged yet, then update cluster centers init_c[:, 0] = mu_1 init_c[:, 1] = mu_2 # Check if algorithm did not converge after n_iters iterations (hence there was too much numerical imprecision) if (i == (n_iters) - 1): print("Due to numerical imprecision, algorithm did not converge") return init_c