Exemplo n.º 1
0
    def test(self):
        LoggerHelper.info("Test Started...")
        self.timer.start()
        df = pandas.DataFrame(columns=['Accuracy', 'Test Accuracy', 'Mean Test Loss'])
        val_losses = []
        self.model.eval()
        counter = 0
        accuracy = 0
        result = np.asarray([])
        result_expected = np.asarray([])
        for x, y in self.reader.get_data(NewsDnnBaseDataReader.DictDataTerm["Test"],
                                         NewsDnnBaseDataReader.DictDataType[
                                             self.config["options"]["network_type"]]):
            counter += 1
            x, y = torch.from_numpy(x), torch.from_numpy(y)
            inputs, targets = x, y
            if self.model.can_use_gpu and self.config["networkConfig"]["useGPU"]:
                inputs, targets = inputs.cuda(), targets.cuda()

            output = self.model(inputs)
            val_loss = self.criterion(output, targets.long())
            val_losses.append(val_loss.item())
            accuracy += self.calculate_accuracy(output, targets)
            result = np.append(result, self.get_output(output))
            result_expected = np.append(result_expected, targets.numpy())
        scores = self.calculate_scores(result_expected, result)
        df = self.log_test(df, accuracy, self.test_count, val_losses, scores)
        Export.append_df_to_excel(df, self.current_date)
        self.timer.stop(time_for="Test")
Exemplo n.º 2
0
    def test(self):
        LoggerHelper.info("Test Started...")
        self.timer.start()
        df = pandas.DataFrame(columns=['Accuracy', 'Test Accuracy', 'Mean Test Loss'])
        # Tracking variables
        val_losses = []
        predictions, true_labels = [], []

        test_set = self.reader.get_data(NewsCateDataReader.DictDataTerm["Test"],
                                        NewsCateDataReader.DictDataType[
                                            self.config["options"]["network_type"]])
        self.model.eval()
        accuracy = 0
        for batch in test_set:
            # Add batch to GPU
            batch = tuple(t.to(self.device) for t in batch)

            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch

            with torch.no_grad():
                # Forward pass, calculate logit predictions
                outputs = self.model(b_input_ids, token_type_ids=None,
                                     attention_mask=b_input_mask)

                logits = outputs[0]

                # Move logits and labels to CPU
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                # Calculate the accuracy for this batch of test sentences.
                label, acc = self.calculate_accuracy(logits, label_ids)
                accuracy += acc

                # Store predictions and true labels
                predictions.append(label)
                true_labels.append(label_ids)
        scores = self.calculate_scores(predictions, true_labels)
        df = self.log_test(df, accuracy, self.test_count, val_losses, scores)
        Export.append_df_to_excel(df, self.current_date)
        self.timer.stop(time_for="Test")
Exemplo n.º 3
0
    def test(self):
        print("Test Started")
        self.timer.start()
        df = pandas.DataFrame(columns=['Accuracy', 'Mean Test Loss'])
        val_h = self.model.init_hidden(self.reader.batch_size)
        val_losses = []
        self.model.eval()
        counter = 0
        counter_r = 2
        accuracy = 0
        result = np.asarray([])
        result_expected = np.asarray([])
        for x, y in self.reader.get_data(PriceRnnDataReader.DictDataTerm["Test"],
                                         PriceRnnDataReader.DictDataType[
                                             self.config["options"]["network_type"]]):
            counter += 1
            x, y = torch.from_numpy(x), torch.from_numpy(y)

            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history
            val_h = tuple([each.data for each in val_h])

            inputs, targets = x, y
            if self.model.can_use_gpu and self.config["networkConfig"]["useGPU"]:
                inputs, targets = inputs.cuda(), targets.cuda()

            output, val_h = self.model(inputs, val_h)
            #val_loss = self.criterion(output, targets.view(self.reader.batch_size * self.reader.sequence_length))
            val_loss = self.criterion(output, targets.long())
            val_losses.append(val_loss.item())
            acc, res = self.calculate_accuracy(output, targets)
            accuracy += acc
            counter_r += len(targets)
            result = np.append(result, res)
            result_expected = np.append(result_expected, targets.numpy())
        scores = self.calculate_scores(result_expected, result)
        df = self.log_test(df, accuracy, counter_r, val_losses, scores)
        Export.append_df_to_excel(df, self.current_date)
        self.timer.stop(time_for="Test")
Exemplo n.º 4
0
    def train(self, lr=0.001, clip=5, val_frac=0.1, print_every=10):
        """ Training a network

            Arguments
            ---------
            lr: learning rate
            clip: gradient clipping
            val_frac: Fraction of data to hold out for validation
            print_every: Number of steps for printing training and validation loss

        """
        df = pandas.DataFrame(
            columns=['Epoch', 'Step', 'Last Train Loss', 'Mean Test Loss'])
        self.timer.start()
        self.model.train()

        if self.model.train_on_gpu:
            self.model.cuda()

        counter = 0
        h = None
        for e in range(self.epochs):
            if h is None:  # initialize hidden state
                h = self.model.init_hidden(self.reader.batch_size)

            for x, y in self.reader.get_train_data(
            ):  # get_batches(data, batch_size, seq_length):
                counter += 1
                inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

                if self.model.train_on_gpu:
                    inputs, targets = inputs.cuda(), targets.cuda()

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                h = tuple([each.data for each in h])

                # zero accumulated gradients
                self.model.zero_grad()
                # get the output from the model -
                output, h = self.model(
                    inputs, h
                )  # Input Should Be 3-Dimensional: seq_len, batch, input_size
                # calculate the loss and perform back propagation
                loss = self.criterion(
                    output,
                    targets.view(self.reader.batch_size *
                                 self.reader.sequence_length))
                loss.backward()
                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()

                # loss stats
                if counter % print_every == 0:
                    # Get validation loss
                    val_h = self.model.init_hidden(self.reader.batch_size)
                    val_losses = []
                    self.model.eval()
                    for x, y in self.reader.get_test_data(
                    ):  # get_batches(val_data, batch_size, seq_length):

                        x, y = torch.from_numpy(x), torch.from_numpy(y)

                        # Creating new variables for the hidden state, otherwise
                        # we'd backprop through the entire training history
                        val_h = tuple([each.data for each in val_h])

                        inputs, targets = x, y
                        if self.model.train_on_gpu:
                            inputs, targets = inputs.cuda(), targets.cuda()

                        output, val_h = self.model(inputs, val_h)
                        val_loss = self.criterion(
                            output,
                            targets.view(self.reader.batch_size *
                                         self.reader.sequence_length))

                        val_losses.append(val_loss.item())

                    self.model.train(
                    )  # reset to train mode after iterationg through validation data
                    print("Epoch: {}/{}...".format(e + 1, self.epochs),
                          "Step: {}...".format(counter),
                          "Loss: {:.4f}...".format(loss.item()),
                          "Val Loss: {:.4f}".format(np.mean(val_losses)))
                    df = df.append(
                        {
                            'Epoch': "{}/{}".format(e + 1, self.epochs),
                            'Step': counter,
                            'Last Train Loss': loss.item(),
                            'Mean Test Loss': np.mean(val_losses)
                        },
                        ignore_index=True)
        self.timer.stop()
        self.save_model()
        date = DateHelper.get_current_date()
        Export.append_df_to_excel(df, date)
        Export.append_df_to_excel(self.get_info(), date)
Exemplo n.º 5
0
    def train(self, clip=5, val_frac=0.1, print_every=20):
        """ Training a network

            Arguments
            ---------
            clip: gradient clipping
            val_frac: Fraction of data to hold out for validation
            print_every: Number of steps for printing training and validation loss

        """
        df = pandas.DataFrame(
            columns=['Epoch', 'Step', 'Last Train Loss', 'Mean Test Loss'])
        self.timer.start()
        self.model.train()

        if self.model.can_use_gpu and self.config["networkConfig"]["useGPU"]:
            self.model.cuda()

        counter = 0
        h = None
        for e in range(self.epochs):
            h = self.model.init_hidden(self.reader.batch_size)

            print(self.config["options"]["network_type"])
            print(NewsDnnGeneralDataReader.DictDataType[self.config["options"]
                                                        ["network_type"]])
            # Batch Loop
            for x, y in self.reader.get_data(
                    fetch_type=NewsDnnGeneralDataReader.DictDataTerm["Train"],
                    data_type=NewsDnnGeneralDataReader.DictDataType[
                        self.config["options"]["network_type"]]):
                counter += 1
                inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

                if self.model.can_use_gpu and self.config["networkConfig"][
                        "useGPU"]:
                    inputs, targets = inputs.cuda(), targets.cuda()

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                h = tuple([each.data for each in h])

                # zero accumulated gradients
                self.model.zero_grad()

                # get the output from the model -
                output, h = self.model(
                    inputs, h
                )  # Input Should Be 3-Dimensional: seq_len, batch, input_size

                # calculate the loss and perform back propagation
                loss = self.criterion(output.squeeze(), targets.long())
                loss.backward()

                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()

                # Validate
                if counter % print_every == 0:
                    timer = Timer()
                    timer.start()
                    df = self.validate(df, e, counter, loss)
                    timer.stop(time_for="Validate")
                self.model.train()
        self.timer.stop(time_for="Train")
        self.save_model()
        self.current_date = DateHelper.get_current_date()
        Export.append_df_to_excel(df, self.current_date)
        Export.append_df_to_excel(self.get_info(), self.current_date)
Exemplo n.º 6
0
    def train(self, clip=5, val_frac=0.1, print_every=20):
        """ Training a network

            Arguments
            ---------
            clip: gradient clipping
            val_frac: Fraction of data to hold out for validation
            print_every: Number of steps for printing training and validation loss

        """
        df = pandas.DataFrame(columns=['Epoch', 'Step',
                                       'Train Mean Loss Cumulative', 'Train Accuracy',
                                       'Val Mean Loss', 'Val Accuracy'])
        self.timer.start()
        self.model.train()

        if self.model.can_use_gpu and self.config["networkConfig"]["useGPU"]:
            self.model.cuda()

        counter = 0
        for e in range(self.epochs):

            print(self.config["options"]["network_type"])
            print(NewsDnnBaseDataReader.DictDataType[
                      self.config["options"]["network_type"]])
            train_accuracy = 0
            losses = []
            # Batch Loop
            for x, y in self.reader.get_data(fetch_type=NewsDnnBaseDataReader.DictDataTerm["Train"],
                                             data_type=NewsDnnBaseDataReader.DictDataType[self.config["options"]["network_type"]]):
                counter += 1
                inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

                if self.model.can_use_gpu and self.config["networkConfig"]["useGPU"]:
                    inputs, targets = inputs.cuda(), targets.cuda()



                # zero accumulated gradients
                self.optimizer.zero_grad()
                # self.model.zero_grad()

                # get the output from the model -
                output = self.model(inputs)  # Input Should Be 3-Dimensional: seq_len, batch, input_size

                # calculate the loss and perform back propagation
                loss = self.criterion(output, targets.long())
                loss.backward()
                losses.append(loss.item())
                train_accuracy += self.calculate_accuracy(output, targets)

                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()

                # Validate In Steps
                if counter % print_every == 0:
                    timer = Timer()
                    timer.start()
                    df = self.validate(df, e, counter, losses, train_accuracy, print_every)
                    train_accuracy = 0  # Clear Train Accuracy
                    timer.stop(time_for="Validate")
                    self.model.train()
        self.timer.stop(time_for="Train")
        self.save_model()
        self.current_date = DateHelper.get_current_date()
        Export.append_df_to_excel(df, self.current_date)
        Export.append_df_to_excel(self.get_info(), self.current_date)
Exemplo n.º 7
0
    def train(self, print_every=20):
        df = pandas.DataFrame(columns=['Epoch', 'Step',
                                       'Train Mean Loss Cumulative', 'Train Accuracy',
                                       'Val Mean Loss', 'Val Accuracy'])
        self.timer.start()
        self.model.train()  # Set mode of model
        losses = []
        train_set = self.reader.get_data(fetch_type=NewsCateDataReader.DictDataTerm["Train"],
                                         data_type=NewsCateDataReader.DictDataType[
                                             self.config["options"]["network_type"]])
        for e in range(self.epochs):
            print(self.config["options"]["network_type"])
            print(NewsCateDataReader.DictDataType[
                      self.config["options"]["network_type"]])
            self.model.train()  # Set to Train Mode
            total_loss_for_epoch = 0

            epoch_timer = Timer()
            epoch_timer.start()
            for step, batch in enumerate(train_set): # For each batch of training data...
                # Progress update every 40 batches.
                if step % print_every == 0:
                    # Report progress.
                    print('  Batch {:>5,}  of  {:>5,}.'.format(step, len(train_set)))
                # Get Data
                b_input_ids = batch[0].to(self.device)
                b_input_mask = batch[1].to(self.device)
                b_labels = batch[2].to(self.device)

                # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
                self.model.zero_grad()

                # Perform a forward pass (evaluate the model on this training batch).
                # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
                outputs = self.model(b_input_ids,
                                     token_type_ids=None,
                                     attention_mask=b_input_mask,
                                     labels=b_labels)
                loss = outputs[0]
                total_loss_for_epoch += loss.item()

                # Perform a backward pass to calculate the gradients.
                loss.backward()

                # This is to help prevent the "exploding gradients" problem.
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

                # modified based on their gradients, the learning rate, etc.
                self.optimizer.step()

                # Update the learning rate.
                self.scheduler.step()

            # Calculate the average loss over the training data.
            avg_train_loss = total_loss_for_epoch / len(train_set)

            # Store the loss value for plotting the learning curve.
            losses.append(avg_train_loss)
            LoggerHelper.info("  Average training loss: {0:.2f}".format(avg_train_loss))
            epoch_timer.stop(time_for="Epoch")

            timer = Timer(start=True)
            df = self.validate(df, e, losses)
            timer.stop(time_for="Validate")
            self.model.train()
        self.timer.stop(time_for="Train")
        self.save_model()
        self.current_date = DateHelper.get_current_date()
        Export.append_df_to_excel(df, self.current_date)
        Export.append_df_to_excel(self.get_info(), self.current_date)