Пример #1
0
    def fit(self, targets_train, contexts_train, n_epochs=10, print_every=100):
        '''Trains the Skip-gram neural network on target and context word data

        Parameters:
        -----------
        targets_train: list of ndarrays. len=num training samples, sub ndarray shape=(1, vocab_sz).
            Training target word data one-hot coded.
        contexts_train: list of ndarrays.
            len=num training samples, sub ndarray shape=(win_sz=<c<2*win_sz).
            Training context word data one-hot coded. sub ndarray shape[0] is variable due to border
            effects at start/end of sentences in corpus.
        n_epochs: int. Number of training epochs.
        print_every: int. How many training EPOCHS we wait before printing the current epoch loss.

        Returns:
        -----------
        self.loss_history. Python list of float. len=n_epochs
            The i-th entry is the MEAN loss value computed across all iterations in the i-th epoch.

        TODO: Update this method's implementation.
        - Remove mini-batch support. Assume that wts will be updated after every training sample is
            processed (stochastic gradient descent).
        - On each training iteration, get the i-th target one-hot vector and associated context word
        indices. This is your "x" and "y". Do the forward/backward pass and wt update like usual.
        - self.loss_history: Only add loss values at the end of an epoch. Make the loss value that you
        add be the MEAN loss value across all iterations in one epoch.
        - Remove support for accuracy/validation checking. This isn't needed for basic Skip-gram.
        '''
        print("targets_train:", targets_train[0].shape)
        iter_per_epoch = len(targets_train)
        n_iter = n_epochs * iter_per_epoch

        loss_history_to_avg = []
        print(f'Starting to train ({n_epochs} epochs)...')
        for i in range(n_iter):
            e = (i + 1.0) / iter_per_epoch  #current epoch
            x = targets_train[i % iter_per_epoch]
            y = contexts_train[i % iter_per_epoch]
            loss = self.forward(x, y)
            loss_history_to_avg.append(loss)
            self.backward(y)
            for layer in self.layers:
                layer.update_weights()
            if i % iter_per_epoch == 0:
                loss = sum(loss_history_to_avg) / len(loss_history_to_avg)
                self.loss_history.append(loss)
                loss_history_to_avg = []
            if (e + 1) % print_every == 0:
                print(
                    f'Finished epoch {e}/{n_epochs}. Epoch Loss: {sum(loss_history_to_avg[-iter_per_epoch:])/iter_per_epoch:.3f}'
                )
        return self.loss_history
Пример #2
0
    def fit(self,
            x_train,
            y_train,
            x_validate,
            y_validate,
            mini_batch_sz=100,
            n_epochs=10,
            acc_freq=9,
            print_every=1):
        '''Trains the neural network on data

        Parameters:
        -----------
        x_train: ndarray. shape=(num training samples, n_chans, img_y, img_x).
            Training data.
        y_train: ndarray. shape=(num training samples,).
            Training data classes, int coded.
        x_validate: ndarray. shape=(num validation samples, n_chans, img_y, img_x).
            Every so often during training (see acc_freq param), we compute
            the accuracy of the network in classifying the validation set
            (out-of-training-set generalization). This is the data we use.
        y_validate: ndarray. shape=(num validation samples,).
            Validation data classes, int coded.
        mini_batch_sz: int. Mini-batch training size.
        n_epochs: int. Number of training epochs.
        acc_freq: int. How many training iterations (weight updates) we wait
            before checking accuracy on the training and validation sets?

        TODO: Complete this method's implementation.
        1. In the main training loop, randomly sample to get a mini-batch.
        2. Do forward pass through network using the mini-batch.
        3. Do backward pass through network using the mini-batch.
        4. Compute the loss on the mini-batch, add it to our loss history list
        5. Call each layer's update wt method.
        6. Use the Python time module to print out the runtime (in minutes) for iteration 0 only.
            Also printout the projected time for completing ALL training iterations.
            (For simplicity, you don't need to consider the time taken for computing
            train and validation accuracy).

        '''
        num_samps, n_chans, img_y, img_x = x_train.shape
        iter_per_epoch = max(int(len(x_train) / mini_batch_sz), 1)
        n_iter = n_epochs * iter_per_epoch

        print('Starting to train...')
        print(f'{n_iter} iterations. {iter_per_epoch} iter/epoch.')
        sec = time.time()
        for i in range(n_iter):
            #generate random indices with replacement for cur_samps and cur_labels
            #indices are guaranteed to match for samps and labels
            random_indices = np.random.choice(np.arange(num_samps),
                                              size=mini_batch_sz,
                                              replace=True)
            cur_samps = x_train[random_indices]
            cur_labels = y_train[random_indices]
            loss = self.forward(cur_samps, cur_labels)
            self.loss_history.append(loss)
            self.backward(cur_labels)
            for layer in self.layers:
                layer.update_weights()

            print(f'Iteration: {i+1}/{n_iter}.')

            if i == 0:
                dt = time.time() - sec
                print("Time taken for iteration 0:", dt)
                time_est = dt * n_iter
                print("Estimated time to complete:", time_est)

            if (i + 1) % acc_freq == 0:
                print("\n-------------LOSS HISTORIES-------------\n")
                print(f"Loss original: {self.loss_history[0]}")
                if len(self.loss_history) < 3:
                    print(f"Loss latest: {self.loss_history[-1]}\n\n")
                else:
                    print(f"Loss latest three: {self.loss_history[-3:]}\n\n")

                print("-----------ACCURACIES-----------\n")
                train_acc = self.accuracy(x_train,
                                          y_train,
                                          mini_batch_sz=mini_batch_sz)
                val_acc = self.accuracy(x_validate,
                                        y_validate,
                                        mini_batch_sz=mini_batch_sz)

                self.train_acc_history.append(train_acc)
                self.validation_acc_history.append(val_acc)
                print(f'  Train acc: {train_acc}, Val acc: {val_acc}\n\n')
        print("\n\n----------------FINAL OUTPUT----------------")
        train_acc = self.accuracy(x_train,
                                  y_train,
                                  mini_batch_sz=mini_batch_sz)
        val_acc = self.accuracy(x_validate,
                                y_validate,
                                mini_batch_sz=mini_batch_sz)

        self.train_acc_history.append(train_acc)
        self.validation_acc_history.append(val_acc)
        print(f'  Train acc: {train_acc}, Val acc: {val_acc}')
        print(f"Loss history: {self.loss_history}")
        print(f"Accuracy history: {self.train_acc_history}")
    def fit(self,
            x_train,
            y_train,
            x_validate,
            y_validate,
            mini_batch_sz=100,
            n_epochs=10,
            acc_freq=4):
        '''Trains the neural network on data

        Parameters:
        -----------
        x_train: ndarray. shape=(num training samples, n_chans, img_y, img_x).
            Training data.
        y_train: ndarray. shape=(num training samples,).
            Training data classes, int coded.
        x_validate: ndarray. shape=(num validation samples, n_chans, img_y, img_x).
            Every so often during training (see acc_freq param), we compute
            the accuracy of the network in classifying the validation set
            (out-of-training-set generalization). This is the data we use.
        y_validate: ndarray. shape=(num validation samples,).
            Validation data classes, int coded.
        mini_batch_sz: int. Mini-batch training size.
        n_epochs: int. Number of training epochs.
        acc_freq: int. How many training iterations (weight updates) we wait
            before checking accuracy on the training and validation sets?

        TODO: Complete this method's implementation.
        1. In the main training loop, randomly sample to get a mini-batch.
        2. Do forward pass through network using the mini-batch.
        3. Do backward pass through network using the mini-batch.
        4. Compute the loss on the mini-batch, add it to our loss history list
        5. Call each layer's update wt method.
        6. Use the Python time module to print out the runtime (in minutes) for iteration 0 only.
            Also printout the projected time for completing ALL training iterations.
            (For simplicity, you don't need to consider the time taken for computing
            train and validation accuracy).

        '''

        # still need to do part 6

        iter_per_epoch = max(int(len(x_train) / mini_batch_sz), 1)
        n_iter = n_epochs * iter_per_epoch

        print('Starting to train...')
        print(f'{n_iter} iterations. {iter_per_epoch} iter/epoch.')

        for i in range(n_iter):
            start_time = time.time()
            index = np.random.randint(x_train.shape[0], size=mini_batch_sz)
            features_batch = x_train[index, :]
            y_batch = y_train[index]
            loss = self.forward(features_batch, y_batch)
            self.backward(y_batch)
            self.loss_history.append(loss)
            #print("loss", loss)
            for layer in self.layers:
                layer.update_weights()

            # NOTE: This print statement should go in your training loop
            if (i + 1) % acc_freq == 0:
                train_acc = self.accuracy(x_train,
                                          y_train,
                                          mini_batch_sz=mini_batch_sz)
                val_acc = self.accuracy(x_validate,
                                        y_validate,
                                        mini_batch_sz=mini_batch_sz)

                self.train_acc_history.append(train_acc)
                self.validation_acc_history.append(val_acc)
                print(f'  Train acc: {train_acc}, Val acc: {val_acc}')

            if i == 0:
                print("Time taken for iteration 0: ",
                      (time.time() - start_time) / 60)
        if self.verbose > 0:
            print('Finished training!')

        return self.loss_history.copy(), self.train_acc_history.copy(
        ), self.validation_acc_history.copy()
Пример #4
0
	def fit(self, targets_train, contexts_train, n_epochs=10, print_every=10):
		'''Trains the Skip-gram neural network on target and context word data

		Parameters:
		-----------
		targets_train: list of ndarrays. len=num training samples, sub ndarray shape=(1, vocab_sz).
			Training target word data one-hot coded.
		contexts_train: list of ndarrays.
			len=num training samples, sub ndarray shape=(win_sz=<c<2*win_sz).
			Training context word data one-hot coded. sub ndarray shape[0] is variable due to border
			effects at start/end of sentences in corpus.
		n_epochs: int. Number of training epochs.
		print_every: int. How many training EPOCHS we wait before printing the current epoch loss.

		Returns:
		-----------
		self.loss_history. Python list of float. len=n_epochs
			The i-th entry is the MEAN loss value computed across all iterations in the i-th epoch.

		TODO: Update this method's implementation.
		- Remove mini-batch support. Assume that wts will be updated after every training sample is
			processed (stochastic gradient descent).
		- On each training iteration, get the i-th target one-hot vector and associated context word
		indices. This is your "x" and "y". Do the forward/backward pass and wt update like usual.
		- self.loss_history: Only add loss values at the end of an epoch. Make the loss value that you
		add be the MEAN loss value across all iterations in one epoch.
		- Remove support for accuracy/validation checking. This isn't needed for basic Skip-gram.
		'''

		iter_per_epoch = len(targets_train)
		n_iter = n_epochs * iter_per_epoch
		sum_of_loss_per_epoch = 0

		print(f'Starting to train ({n_epochs} epochs)...')

		# FILL IN CODE HERE
		for j in range(1, n_epochs+1):
			for i in range(iter_per_epoch):
				xi = targets_train[i]
				yi = contexts_train[i]

				# compute net act for each layer and pass it forward
				inputs = targets_train[i].copy()
				for layer in self.layers:
					inputs = layer.forward(inputs)

				# compute loss of iteration and add it to the sum of losses
				loss = self.layers[-1].loss(contexts_train[i])
				sum_of_loss_per_epoch += loss

				# use loss to call backward passing in the d_upstream
				d_upstream = loss
				for layer in reversed(self.layers):
					#return dprev_net_act, self.d_wts, self.d_b
					d_upstream = layer.backward(d_upstream, contexts_train[i])[0]

				# update the weights of the layers
				for layer in self.layers:
					layer.update_weights()

			# Put this in your traing loop
			if (j) % print_every == 0:
				print(f'Finished epoch {j}/{n_epochs}. Epoch Loss: {loss/iter_per_epoch:.3f}')
				print(self.loss_history[-1])
			# at the end of an epoch save avg loss and reset sum
			# save average loss of the epoch
			self.loss_history.append(sum_of_loss_per_epoch/iter_per_epoch)
			# reset loss of epoch
			sum_of_loss_per_epoch = 0


		return self.loss_history