Пример #1
0
    def validate(self):
        """
        This function makes predictions for the naive bayes algorithm
        """
        print("Testing Naive Bayes Accuracy...")
        if not self.model:
            print("Please call the function train first!")
            return

        # Calculate predictions by calculating the probability of each
        # C=1 and C=0 in each sample(row)
        for index, row in self.test.iterrows():
            c0_product = self.model.prob_0
            c1_product = self.model.prob_1
            for feature_index, feature in enumerate(row):
                if feature == 0:
                    c0_product *= self.model.feature_probs_f0_given0[
                        feature_index]
                    c1_product *= self.model.feature_probs_f0_given1[
                        feature_index]
                else:
                    c0_product *= self.model.feature_probs_f1_given0[
                        feature_index]
                    c1_product *= self.model.feature_probs_f1_given1[
                        feature_index]

            if c0_product > c1_product:
                self.predictions.append(0)
            else:
                self.predictions.append(1)

        return get_num_similarities(self.predictions, self.test_classes) / len(
            self.test_classes) * 100
 def test_performance(self, data, *hypothesis):
     """
     This function measures performance using the validation function
     :param data: The data to test on
     :param hypothesis: The model to use
     :return: The performance as a percentage correct
     """
     predictions = self.validate(data, *hypothesis)
     return get_num_similarities(self.valid_classes, predictions) / len(
         self.valid_classes)
    def make_predictions(self):
        predictions = list()
        for i, row in self.test.iterrows():
            prediction = self.predict(row)
            predictions.append(prediction)

        print("Predicted Classes = ")
        print(predictions)

        print("Expected Classes = ")
        print(list(self.test_classes))

        return get_num_similarities(predictions, self.test_classes) / len(
            self.test_classes) * 100
Пример #4
0
def cluster_and_classify(optimized_feature_set, x_test, x_train):
    """This function run the clustering and classification algorithms and
    tests clusters with the silhouette coffecient"""
    # Use k-means to cluster data
    print("Running K Means on Glass data set with optimized feature set...")
    km = KMeansClustering(x_train[optimized_feature_set], 2)
    labels = km.run()
    # Train the training data with the cluster labels using Naive Bayes
    print("Training with Naive Bayes with k-means labels...")
    model = nb.learn(pd.Series(labels), x_train[optimized_feature_set])
    # Test the naive bayes classifier on test data
    print("Testing Naive Bayes Classifier with cluster labels")
    predictions = nb.test(x_test[optimized_feature_set], *model)
    print("Naive Bayes Classifier Performance = " +
          str(get_num_similarities(labels, predictions) / len(labels) * 100))
    # Find the silhouette coefficient of the clusters
    print("Calculating the silhouette coefficient...")
    sc = calculate_silhouette_coefficient(x_train[optimized_feature_set],
                                          labels)
    print("Silhouette Coefficient = " + str(sc))
    def validate(self):
        """
        This function determines accuracy of model using the test data set
        and applying the linear function using the weights
        """
        print("Testing...")
        predictions = []
        for x_index, x_values in self.test.iterrows():
            # Calculate linear value by adding up x values and their weights
            o = 0
            for j, value in enumerate(self.features):
                o += float(x_values[j]) * self.weights[j]

            y = self.sigmoid(o)

            if y > 0.5:
                predictions.append(1)
            else:
                predictions.append(0)

        return get_num_similarities(predictions, self.test_classes) / len(self.test_classes) * 100