def mlpn_loglinear_check(): import mlpn import train_mlp1 dims = [20, 3] params = mlpn.create_classifier(dims) x = np.random.randn(dims[0], ) y = 0 print( "MLP arbitrary layers gradient check (special case of log linear model" ) for i in range(5): random_params = train_mlp1.randomly_initialize_params(params) W, b = random_params def _loss_and_W_grad(W_): loss, grads = mlpn.loss_and_gradients(x, y, [W_, b]) return loss, grads[0] def _loss_and_b_grad(b_): loss, grads = mlpn.loss_and_gradients(x, y, [W, b_]) return loss, grads[1] print(f"Gradients checks for random initialization {i + 1}") gradient_check(_loss_and_W_grad, W) gradient_check(_loss_and_b_grad, b)
def mlpn_2_hidden_check(): import mlpn import train_mlp1 dims = [50, 20, 10, 3] params = mlpn.create_classifier(dims) x = np.random.randn(dims[0], ) y = 0 print( "MLP arbitrary layers gradient check (special case of log linear model" ) for i in range(5): random_params = train_mlp1.randomly_initialize_params(params) W1, b1, W2, b2, W3, b3 = random_params def _loss_and_W1_grad(W1_): loss, grads = mlpn.loss_and_gradients(x, y, [W1_, b1, W2, b2, W3, b3]) return loss, grads[0] def _loss_and_b1_grad(b1_): loss, grads = mlpn.loss_and_gradients(x, y, [W1, b1_, W2, b2, W3, b3]) return loss, grads[1] def _loss_and_W2_grad(W2_): loss, grads = mlpn.loss_and_gradients(x, y, [W1, b1, W2_, b2, W3, b3]) return loss, grads[2] def _loss_and_b2_grad(b2_): loss, grads = mlpn.loss_and_gradients(x, y, [W1, b1, W2, b2_, W3, b3]) return loss, grads[3] def _loss_and_W3_grad(W3_): loss, grads = mlpn.loss_and_gradients(x, y, [W1, b1, W2, b2, W3_, b3]) return loss, grads[4] def _loss_and_b3_grad(b3_): loss, grads = mlpn.loss_and_gradients(x, y, [W1, b1, W2, b2, W3, b3_]) return loss, grads[5] print(f"Gradients checks for random initialization {i + 1}") gradient_check(_loss_and_W1_grad, W1) gradient_check(_loss_and_b1_grad, b1) gradient_check(_loss_and_W2_grad, W2) gradient_check(_loss_and_b2_grad, b2) gradient_check(_loss_and_W3_grad, W3) gradient_check(_loss_and_b3_grad, b3)
def bigram_model(): train_data = ut.TRAIN dev_data = ut.DEV test_data = ut.TEST in_dim = len(ut.vocab) out_dim = len(ut.L2I) num_iterations = 10 learning_rate = 0.01 params = model.create_classifier([in_dim, out_dim]) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params)
def mlpn_grad_sanity(): # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. # import sys #sys.path.append("C:\Shahar\BarIlan\NLP-courses\89687-DL\Assignment1\code\loglinear.py") #print(sys.path) #from .grad_check import gradient_check W, b, U, b_tag = mlpn.create_classifier([3, 4, 6]) def _loss_and_W_grad(W): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[0] def _loss_and_b_grad(b): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[1] def _loss_and_U_grad(U): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[2] def _loss_and_b_tag_grad(b_tag): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[3] for _ in range(1): W = randomize_array(W) b = randomize_array(b) U = randomize_array(U) b_tag = randomize_array(b_tag) print("b_tag") gradient_check(_loss_and_b_tag_grad, b_tag) print("U:") gradient_check(_loss_and_U_grad, U) print("b:") gradient_check(_loss_and_b_grad, b) print("W:") gradient_check(_loss_and_W_grad, W)
def main(text_to_ngram): if config.debug: np.random.seed(config.mlpn.seed) train_data = utils.read_data(config.filename_train) symbol_dict = tl.initialize_symbol_dict(train_data, text_to_ngram) label_dict = tl.initialize_label_dict(train_data) xy_train = list( tl.xy_generator(train_data, text_to_ngram, symbol_dict, label_dict)) dev_data = utils.read_data(config.filename_dev) xy_dev = list( tl.xy_generator(dev_data, text_to_ngram, symbol_dict, label_dict)) dims = config.mlpn.layer_sizes if config.debug: print("problem dimensions are: {}".format(dims)) params = pn.create_classifier(dims) params = [randomize_array(p) for p in params] trained_params = train_classifier(xy_train, xy_dev, config.mlpn.num_iterations, config.mlpn.learning_rate, params) return trained_params
for b in bs: if b not in bigrams: bigrams[b] = i i += 1 def dataFromFile(fileData): data = [] for [lang, bs] in fileData: features = np.zeros(len(bigrams)) for b in bs: if b in bigrams: features[bigrams[b]] += 1 language = languages[lang] if lang in languages else -1 data.append([language, features]) return data trainData = dataFromFile(utils.TRAIN) devData = dataFromFile(utils.DEV) testData = dataFromFile(utils.TEST) params = mlpn.create_classifier([len(bigrams), 13, 13, len(languages)]) trainedParams = train_classifier(trainData, devData, 20, 0.01, params) predictions = [] for [label, data] in testData: predictions.append(languagesBack[mlpn.predict(data, trainedParams)]) outF = open('test.pred', 'w') outF.write("\n".join(predictions)) outF.close()
params: list of parameters (initial values) """ for epoch in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec_uni(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss new_params =[] for (p,g) in zip(params,grads): new_params.append(p-learning_rate*g) params = new_params # update the parameters according to the gradients # and the learning rate. """if epoch == 4: learning_rate = 0.001""" train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(epoch, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. params = ll.create_classifier([600, 1200, 6]) trained_params = train_classifier(TRAIN, DEV, 10, 0.0285, params)
x = features # numpy vector. y = label # a number. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # SGD update parameters for i in range(0, len(params)): params[i] -= learning_rate * grads[i] # notify progress train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params if __name__ == '__main__': # training parameters num_iterations = 100 learning_rate = 1e-2 # get params vocab_size, num_langs, train_data, dev_data = utils.get_bigram_data() dims = [vocab_size, 144, 30, num_langs] params = mlpn.create_classifier(dims) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) pickle.dump(trained_params, open("model.p", "wb"))
:param test_data: test data to be predicted :param params: trained params :return: """ f = open("test.pred", 'w') for label, features in test_data: x = feats_to_vec(features) y_hat = mlpn.predict(x, params) for l, i in utils.L2I.items(): if y_hat == i: label = l break f.write(label + "\n") f.close() LR = 0.001 NUM_ITERATIONS = 15 HIDDEN_LAYER_SIZE = 10 if __name__ == '__main__': params = mlpn.create_classifier( [len(utils.F2I), HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, len(utils.L2I)]) trained_params = train_classifier(utils.TRAIN, utils.DEV, NUM_ITERATIONS, LR, params) TEST = [(l, utils.text_to_bigrams(t)) for l, t in utils.read_data("test")] create_test_pred_file(TEST, trained_params)
def fileData(fData): data = [] for [lang, bigrams] in fData: features = np.zeros(len(all_bigrams)) for bigram in bigrams: if bigram in all_bigrams: features[all_bigrams[bigram]] += 1 language = all_langs[lang] if lang in all_langs else -1 data.append([language, features]) return data # process the training and dev data and print accuracy #params = mlp.create_classifier(len(all_bigrams), int(math.log(len(all_bigrams) * len(all_langs))), len(all_langs)) params = mlp.create_classifier( [len(all_bigrams), 20, 30, 40, 10, len(all_langs)]) trained_params = train_classifier(fileData(utils.TRAIN), fileData(utils.DEV), num_iterations, learning_rate, params) # run prediction on the test data predict = [] for [label, data] in fileData(utils.TEST): predict.append(lang_to_id[mlp.predict(data, trained_params)]) """ In comment in order not to run over the file # write the prediction to a file predict_file = open('test.pred', 'w') predict_file.writelines(["%s\n" % item for item in predict]) predict_file.close() """
train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params def run_test(test_data, params): pred_file = open("test.pred", 'w') for label, features in test_data: x = feats_to_vec(features) # convert features to a vector. y_hat = ml.predict(x, params) for key, val in ut.L2I.items( ): # for name, age in dictionary.iteritems(): (for Python 2.x) if val == y_hat: label = key break pred_file.write(str(label) + "\n") pred_file.close() if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. params = ml.create_classifier( [len(ut.F2I), HIDDEN_SIZE1, HIDDEN_SIZE2, len(ut.L2I)]) trained_params = train_classifier(ut.TRAIN, ut.DEV, EPOCHS, ETA, params) run_test(ut.TRAIN, trained_params)
return params def test(parameters): """ test classifier with test data - no labels params - the trained params """ # fd = open("test.pred", 'w') counter = 0 test_ans = '' test_data = ut.read_data('test') for label, feature in test_data: pred = mlpn.predict(feats_to_vec(feature), parameters) for l,i in ut.L2I.items(): if i == pred: test_ans = l counter += 1 #fd.write(test_ans+"\n") #print 'line: ', counter, 'prediction: ', test_ans #fd.close() if __name__ == '__main__': train_data = ut.read_data('train') dev_data = ut.read_data('dev') params = mlpn.create_classifier([len(ut.F2I),HIDDEN_SIZE, HIDDEN_SIZE, HIDDEN_SIZE, len(ut.L2I)]) trained_params = train_classifier(train_data,dev_data,EPOCH,LR,params) print trained_params test(trained_params)
return params if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. train_data = utils.TRAIN dev_data = utils.DEV learning_rate = 0.01 num_iterations = 250 out_dim = len(utils.L2I) hid_dim1 = 100 hid_dim2 = 50 hid_dim3 = 20 in_dim = 600 # in_dim = len(utils.F2I) params = mlpn.create_classifier( [in_dim, hid_dim1, hid_dim2, hid_dim3, out_dim]) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) if EXECUTE_TEST: test_results = [] for val in utils.TEST: test_results.append(utils.I2L[mlpn.predict(feats_to_vec(val[1]), trained_params)]) with open("test_results.txt", "w") as f: f.writelines(["%s\n" % item for item in test_results])
x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = mlp_n.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. # Teata = Theata - learnning rate * gradient. # For every param in the params list reduce the right gradient multiply the lr. for index, _ in enumerate(params): params[index] = params[index] - learning_rate * grads[index] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. firstLayerNeuronsLength = len(ut.F2I) outputLayerNeuronsLength = len(ut.L2I) params = mlp_n.create_classifier( [firstLayerNeuronsLength, 20, 30, 40, 10, outputLayerNeuronsLength]) trained_params = train_classifier(ut.TRAIN, ut.DEV, EPOCHS, ETA, params)
import mlpn as mlp from train_mlpn import train_classifier from utils import load_train_set, load_validation_set STUDENT = {'name': 'Dorin Keshales'} if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. # ... # Allowing the user to decide how many hidden layers he wants and define their sizes. dimensions = [700] d = input( "Enter the dimensions of the hidden layers separated by comma.\nIf you don't want hidden layers press enter.\n" ) if len(d) != 0: d = d.split(",") for dim in d: dimensions.append(int(dim)) dimensions.append(6) train, features_size, labels_size = load_train_set('bigrams') validation = load_validation_set('bigrams') learning_rate = 0.001 params = mlp.create_classifier(dimensions) trained_params = train_classifier(train, validation, 10, learning_rate, params)
if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. train_data = utils.TRAIN dev_data = utils.DEV in_dim = len(utils.F2I) out_dim = len(utils.L2I) num_iterations = 7 learning_rate = 1e-3 TEST = [utils.text_to_bigrams(t) for l, t in utils.read_data("test")] params = mlpn.create_classifier([in_dim, 500, 256, 100, out_dim]) #params = mlpn.create_classifier([in_dim, out_dim]) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) pred(TEST, trained_params) # # in_dim = 2 # hidden_dim = 4 # out_dim = 2 # num_iterations = 20 # learning_rate = 1 # # # params = mlpn.create_classifier([in_dim, hidden_dim, out_dim])
for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I.get(label) # convert the label to number if needed. loss, gradients = mlpn.loss_and_gradients(x, y, params) cum_loss += loss for param, dparam in zip(params, gradients): param -= learning_rate * dparam # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params if __name__ == '__main__': # YOUR CODE HERE # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. # ... in_dim = VOCAB_SIZE hidden_dim = CATEGORIES * 2 out_dim = CATEGORIES params = mlpn.create_classifier([600, 200, 100, 6]) trained_params = train_classifier(TRAIN, DEV, num_iterations, learning_rate, params)
cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) gw, gb = grads cum_loss += loss W, b = params W -= learning_rate * gw b -= learning_rate * gb params = W, b # update the parameters according to the gradients # and the learning rate. if epoch == 4: learning_rate = 0.001 train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(epoch, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. params = ll.create_classifier(600, 6) trained_params = train_classifier(TRAIN, DEV, 10, 0.01, params) # predict(TEST, (ll.predict, params))
# update the parameters according to the gradients # and the learning rate. for i in range(0, len(params), 2): params[i] -= learning_rate * grads[i] b = params[i + 1] params[i + 1] = np.squeeze( (b - learning_rate * grads[i + 1].T).T) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params if __name__ == '__main__': # write code to load the train and dev sets, set up whatever you need, # and call train_classifier. train_data = ut.TRAIN dev_data = ut.DEV test_data = ut.TEST num_iterations = 20 learning_rate = 0.001 in_dim, out_dim = len(ut.F2I), len(ut.L2I) layers_sizes = [in_dim, out_dim] params = mlpn.create_classifier(layers_sizes) trained_params = train_classifier(train_data, dev_data, num_iterations, learning_rate, params) test_predict(test_data, trained_params)