예제 #1
0
    def visualize(self,
                  predictions,
                  model,
                  save_dir='../../save/keras',
                  plt_name='keras'):
        # Evaluate predictions using accuracy metrics
        accuracy = accuracy_score(self.y_test, predictions)
        print('{} Classification'.format(model))
        print("Accuracy: %.2f%%" % (accuracy * 100.0))

        # Evaluate predictions using confusion metrics and plot confusion matrix
        classification_report = metrics.classification_report(
            predictions,
            self.y_test,
            target_names=['NadaSportswear', 'Sportswear'])
        print(classification_report)

        # Calculating confusion matrix
        cnf_matrix = confusion_matrix(self.y_test, predictions)
        np.set_printoptions(precision=2)

        # Plot module is used for plotting confusion matrix, classification report
        plot = Plot()
        plot.plotly(cnf_matrix, classification_report,
                    os.path.join(self.args.save_dir, embedding_type), plt_name)
예제 #2
0
    def run_pipeline(self):
        """
        run_pipeline function runs the actual pipeline.
        :return:
        """

        # Train & Test data split using sklearn train_test_split module
        X_train, X_test, y_train, y_test = train_test_split(
            self.data['url'],
            self.data['label'],
            test_size=0.33,
            random_state=21,
            stratify=self.data['label'])
        print(
            "*******************\nTrain set : {} \n Test set : {}\n*******************\n"
            .format(X_train.shape[0], X_test.shape[0]))

        # Running the pipeline
        model = self.pipeline.fit(X_train, y_train)

        print('Saving the {} model after fitting on training data.'.format(
            str(self.args.model).upper()))
        # Dumping tokenizer
        joblib.dump(
            model,
            os.path.join(self.args.checkpoint_dir,
                         '{}.pickle'.format(self.args.model)))

        # Calculating time per prediction
        # Start time ******************************************************************************
        start = timeit.default_timer()

        # Predicting label, confidence probability on the test data set
        predictions = model.predict(X_test)
        predictions_prob = model.predict_proba(X_test)

        # Binary class values : rounding them to 0 or 1
        predictions = [round(value) for value in predictions]

        end = timeit.default_timer()
        # End Time ******************************************************************************

        print('Time per prediction : {}'.format(
            (end - start) / X_test.shape[0]))

        # evaluate predictions using accuracy metrics
        accuracy = accuracy_score(y_test, predictions)
        print('{} Classification'.format(self.args.model))
        print("Accuracy: %.2f%%" % (accuracy * 100.0))

        # evaluate predictions using confusion metrics and plot confusion matrix
        classification_report = metrics.classification_report(
            predictions, y_test, target_names=['NadaSportswear', 'Sportswear'])
        print(classification_report)

        # Plotting confusion matrix
        cnf_matrix = confusion_matrix(y_test, predictions)
        np.set_printoptions(precision=2)

        # Plot module is used for plotting confusion matrix, classification report
        plot = Plot()
        plot.plotly(cnf_matrix, classification_report, self.args.save_dir,
                    self.args.model)