def fit(self, targets_train, contexts_train, n_epochs=10, print_every=100): '''Trains the Skip-gram neural network on target and context word data Parameters: ----------- targets_train: list of ndarrays. len=num training samples, sub ndarray shape=(1, vocab_sz). Training target word data one-hot coded. contexts_train: list of ndarrays. len=num training samples, sub ndarray shape=(win_sz=<c<2*win_sz). Training context word data one-hot coded. sub ndarray shape[0] is variable due to border effects at start/end of sentences in corpus. n_epochs: int. Number of training epochs. print_every: int. How many training EPOCHS we wait before printing the current epoch loss. Returns: ----------- self.loss_history. Python list of float. len=n_epochs The i-th entry is the MEAN loss value computed across all iterations in the i-th epoch. TODO: Update this method's implementation. - Remove mini-batch support. Assume that wts will be updated after every training sample is processed (stochastic gradient descent). - On each training iteration, get the i-th target one-hot vector and associated context word indices. This is your "x" and "y". Do the forward/backward pass and wt update like usual. - self.loss_history: Only add loss values at the end of an epoch. Make the loss value that you add be the MEAN loss value across all iterations in one epoch. - Remove support for accuracy/validation checking. This isn't needed for basic Skip-gram. ''' print("targets_train:", targets_train[0].shape) iter_per_epoch = len(targets_train) n_iter = n_epochs * iter_per_epoch loss_history_to_avg = [] print(f'Starting to train ({n_epochs} epochs)...') for i in range(n_iter): e = (i + 1.0) / iter_per_epoch #current epoch x = targets_train[i % iter_per_epoch] y = contexts_train[i % iter_per_epoch] loss = self.forward(x, y) loss_history_to_avg.append(loss) self.backward(y) for layer in self.layers: layer.update_weights() if i % iter_per_epoch == 0: loss = sum(loss_history_to_avg) / len(loss_history_to_avg) self.loss_history.append(loss) loss_history_to_avg = [] if (e + 1) % print_every == 0: print( f'Finished epoch {e}/{n_epochs}. Epoch Loss: {sum(loss_history_to_avg[-iter_per_epoch:])/iter_per_epoch:.3f}' ) return self.loss_history
def fit(self, x_train, y_train, x_validate, y_validate, mini_batch_sz=100, n_epochs=10, acc_freq=9, print_every=1): '''Trains the neural network on data Parameters: ----------- x_train: ndarray. shape=(num training samples, n_chans, img_y, img_x). Training data. y_train: ndarray. shape=(num training samples,). Training data classes, int coded. x_validate: ndarray. shape=(num validation samples, n_chans, img_y, img_x). Every so often during training (see acc_freq param), we compute the accuracy of the network in classifying the validation set (out-of-training-set generalization). This is the data we use. y_validate: ndarray. shape=(num validation samples,). Validation data classes, int coded. mini_batch_sz: int. Mini-batch training size. n_epochs: int. Number of training epochs. acc_freq: int. How many training iterations (weight updates) we wait before checking accuracy on the training and validation sets? TODO: Complete this method's implementation. 1. In the main training loop, randomly sample to get a mini-batch. 2. Do forward pass through network using the mini-batch. 3. Do backward pass through network using the mini-batch. 4. Compute the loss on the mini-batch, add it to our loss history list 5. Call each layer's update wt method. 6. Use the Python time module to print out the runtime (in minutes) for iteration 0 only. Also printout the projected time for completing ALL training iterations. (For simplicity, you don't need to consider the time taken for computing train and validation accuracy). ''' num_samps, n_chans, img_y, img_x = x_train.shape iter_per_epoch = max(int(len(x_train) / mini_batch_sz), 1) n_iter = n_epochs * iter_per_epoch print('Starting to train...') print(f'{n_iter} iterations. {iter_per_epoch} iter/epoch.') sec = time.time() for i in range(n_iter): #generate random indices with replacement for cur_samps and cur_labels #indices are guaranteed to match for samps and labels random_indices = np.random.choice(np.arange(num_samps), size=mini_batch_sz, replace=True) cur_samps = x_train[random_indices] cur_labels = y_train[random_indices] loss = self.forward(cur_samps, cur_labels) self.loss_history.append(loss) self.backward(cur_labels) for layer in self.layers: layer.update_weights() print(f'Iteration: {i+1}/{n_iter}.') if i == 0: dt = time.time() - sec print("Time taken for iteration 0:", dt) time_est = dt * n_iter print("Estimated time to complete:", time_est) if (i + 1) % acc_freq == 0: print("\n-------------LOSS HISTORIES-------------\n") print(f"Loss original: {self.loss_history[0]}") if len(self.loss_history) < 3: print(f"Loss latest: {self.loss_history[-1]}\n\n") else: print(f"Loss latest three: {self.loss_history[-3:]}\n\n") print("-----------ACCURACIES-----------\n") train_acc = self.accuracy(x_train, y_train, mini_batch_sz=mini_batch_sz) val_acc = self.accuracy(x_validate, y_validate, mini_batch_sz=mini_batch_sz) self.train_acc_history.append(train_acc) self.validation_acc_history.append(val_acc) print(f' Train acc: {train_acc}, Val acc: {val_acc}\n\n') print("\n\n----------------FINAL OUTPUT----------------") train_acc = self.accuracy(x_train, y_train, mini_batch_sz=mini_batch_sz) val_acc = self.accuracy(x_validate, y_validate, mini_batch_sz=mini_batch_sz) self.train_acc_history.append(train_acc) self.validation_acc_history.append(val_acc) print(f' Train acc: {train_acc}, Val acc: {val_acc}') print(f"Loss history: {self.loss_history}") print(f"Accuracy history: {self.train_acc_history}")
def fit(self, x_train, y_train, x_validate, y_validate, mini_batch_sz=100, n_epochs=10, acc_freq=4): '''Trains the neural network on data Parameters: ----------- x_train: ndarray. shape=(num training samples, n_chans, img_y, img_x). Training data. y_train: ndarray. shape=(num training samples,). Training data classes, int coded. x_validate: ndarray. shape=(num validation samples, n_chans, img_y, img_x). Every so often during training (see acc_freq param), we compute the accuracy of the network in classifying the validation set (out-of-training-set generalization). This is the data we use. y_validate: ndarray. shape=(num validation samples,). Validation data classes, int coded. mini_batch_sz: int. Mini-batch training size. n_epochs: int. Number of training epochs. acc_freq: int. How many training iterations (weight updates) we wait before checking accuracy on the training and validation sets? TODO: Complete this method's implementation. 1. In the main training loop, randomly sample to get a mini-batch. 2. Do forward pass through network using the mini-batch. 3. Do backward pass through network using the mini-batch. 4. Compute the loss on the mini-batch, add it to our loss history list 5. Call each layer's update wt method. 6. Use the Python time module to print out the runtime (in minutes) for iteration 0 only. Also printout the projected time for completing ALL training iterations. (For simplicity, you don't need to consider the time taken for computing train and validation accuracy). ''' # still need to do part 6 iter_per_epoch = max(int(len(x_train) / mini_batch_sz), 1) n_iter = n_epochs * iter_per_epoch print('Starting to train...') print(f'{n_iter} iterations. {iter_per_epoch} iter/epoch.') for i in range(n_iter): start_time = time.time() index = np.random.randint(x_train.shape[0], size=mini_batch_sz) features_batch = x_train[index, :] y_batch = y_train[index] loss = self.forward(features_batch, y_batch) self.backward(y_batch) self.loss_history.append(loss) #print("loss", loss) for layer in self.layers: layer.update_weights() # NOTE: This print statement should go in your training loop if (i + 1) % acc_freq == 0: train_acc = self.accuracy(x_train, y_train, mini_batch_sz=mini_batch_sz) val_acc = self.accuracy(x_validate, y_validate, mini_batch_sz=mini_batch_sz) self.train_acc_history.append(train_acc) self.validation_acc_history.append(val_acc) print(f' Train acc: {train_acc}, Val acc: {val_acc}') if i == 0: print("Time taken for iteration 0: ", (time.time() - start_time) / 60) if self.verbose > 0: print('Finished training!') return self.loss_history.copy(), self.train_acc_history.copy( ), self.validation_acc_history.copy()
def fit(self, targets_train, contexts_train, n_epochs=10, print_every=10): '''Trains the Skip-gram neural network on target and context word data Parameters: ----------- targets_train: list of ndarrays. len=num training samples, sub ndarray shape=(1, vocab_sz). Training target word data one-hot coded. contexts_train: list of ndarrays. len=num training samples, sub ndarray shape=(win_sz=<c<2*win_sz). Training context word data one-hot coded. sub ndarray shape[0] is variable due to border effects at start/end of sentences in corpus. n_epochs: int. Number of training epochs. print_every: int. How many training EPOCHS we wait before printing the current epoch loss. Returns: ----------- self.loss_history. Python list of float. len=n_epochs The i-th entry is the MEAN loss value computed across all iterations in the i-th epoch. TODO: Update this method's implementation. - Remove mini-batch support. Assume that wts will be updated after every training sample is processed (stochastic gradient descent). - On each training iteration, get the i-th target one-hot vector and associated context word indices. This is your "x" and "y". Do the forward/backward pass and wt update like usual. - self.loss_history: Only add loss values at the end of an epoch. Make the loss value that you add be the MEAN loss value across all iterations in one epoch. - Remove support for accuracy/validation checking. This isn't needed for basic Skip-gram. ''' iter_per_epoch = len(targets_train) n_iter = n_epochs * iter_per_epoch sum_of_loss_per_epoch = 0 print(f'Starting to train ({n_epochs} epochs)...') # FILL IN CODE HERE for j in range(1, n_epochs+1): for i in range(iter_per_epoch): xi = targets_train[i] yi = contexts_train[i] # compute net act for each layer and pass it forward inputs = targets_train[i].copy() for layer in self.layers: inputs = layer.forward(inputs) # compute loss of iteration and add it to the sum of losses loss = self.layers[-1].loss(contexts_train[i]) sum_of_loss_per_epoch += loss # use loss to call backward passing in the d_upstream d_upstream = loss for layer in reversed(self.layers): #return dprev_net_act, self.d_wts, self.d_b d_upstream = layer.backward(d_upstream, contexts_train[i])[0] # update the weights of the layers for layer in self.layers: layer.update_weights() # Put this in your traing loop if (j) % print_every == 0: print(f'Finished epoch {j}/{n_epochs}. Epoch Loss: {loss/iter_per_epoch:.3f}') print(self.loss_history[-1]) # at the end of an epoch save avg loss and reset sum # save average loss of the epoch self.loss_history.append(sum_of_loss_per_epoch/iter_per_epoch) # reset loss of epoch sum_of_loss_per_epoch = 0 return self.loss_history