Exemplo n.º 1
0
class TestNaiveBayesFit(unittest.TestCase):
    def setUp(self):
        self.model1 = NaiveBayes()
        self.model2 = NaiveBayes([('A','B')])

    def test_fit_model_creation(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                                            columns=['A', 'B', 'C', 'D', 'E'])
        
        self.model1.fit(values, 'A')
        six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E'])
        six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'),
                                                    ('A', 'E')])
        self.assertEqual(self.model1.parent_node, 'A')
        self.assertSetEqual(self.model1.children_nodes, {'B','C','D','E'})

        self.model2.fit(values)
        six.assertCountEqual(self, self.model1.nodes(), ['A', 'B', 'C', 'D', 'E'])
        six.assertCountEqual(self, self.model1.edges(), [('A', 'B'), ('A', 'C'), ('A', 'D'),
                                                    ('A', 'E')])
        self.assertEqual(self.model2.parent_node, 'A')
        self.assertSetEqual(self.model2.children_nodes, {'B','C','D','E'})

    def test_fit_model_creation_exception(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                                            columns=['A', 'B', 'C', 'D', 'E'])
        values2 = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 3)),
                                            columns=['C', 'D', 'E'])

        self.assertRaises(ValueError, self.model1.fit, values)
        self.assertRaises(ValueError, self.model1.fit, values2)
        self.assertRaises(ValueError, self.model2.fit, values2, 'A')

    def tearDown(self):
        del self.model1
        del self.model2
Exemplo n.º 2
0
    row_size = data.shape[0]
    random_indices = sample(range(row_size), 2000)
    smallDF = data.iloc[random_indices, :]
    smallDF.shape
    PseudoCounts = {}
    #Pseudocounts are given (1,1) for uniform
    for productName in smallDF.columns:
        PseudoCounts[productName] = [1, 1]
    DictOfModels = {}
    Edges = {}
    Nodes = {}
    CPD = {}
    for productName in smallDF.columns:
        print('Building model for {0}'.format(productName))
        model = NaiveBayes()
        model.fit(smallDF, productName)
        DictOfModels[productName] = model
        #Save edge ,node, CPD information
        Edges[productName] = model.edges()
        Nodes[productName] = model.nodes()
        CPD[productName] = model.get_cpds()
    with open("Edges.txt", "wb") as fp:
        pickle.dump(Edges, fp)

    with open("Nodes.txt", "wb") as fp:
        pickle.dump(Nodes, fp)

    with open("CPD.txt", "wb") as fp:
        pickle.dump(CPD, fp)

    with open("RandomColumns.txt", "wb") as fp:
Exemplo n.º 3
0
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder


col_names = pd.read_csv('data/names.csv')  # 'data/names.csv'
data = pd.read_csv('data/breast-cancer-wisconsin.data', names=col_names.columns)
data = data[data["bare_nuclei"] != '?']
data.set_index('id', inplace=True) #stop the model from using id as a node

train, test = train_test_split(data, test_size=0.2, random_state=0)
Y_test = test['class']
test = test.drop(['class'], axis=1)

#fit model
model = NaiveBayes()
model.fit(train, 'class')
print("Naive Bayes edges:        ", model.edges())

#make predictions
Y_pred = model.predict(test)

#Convert Labels so we can use sklearn function to evaluate our model
labelencoder = LabelEncoder()
Y_test = labelencoder.fit_transform(Y_test.values.ravel())
Y_pred = labelencoder.fit_transform(Y_pred.values.ravel())

# Output results
accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)
print({"Accuracy": accuracy, "Precision": precision, "F1 Score": f1})
Exemplo n.º 4
0
# Split the data to test and train
test_size = 0.33
print("\nSplitting in to training and test data using: Test size = ", test_size)
data_train, data_test = train_test_split(df, test_size=test_size)
print("training data:", len(data_train))
print("test data:", len(data_test))


#################################################################################
##### Defining the model
#################################################################################
model = NaiveBayes()

# Learning CPDs using Maximum Likelihood Estimators
model.fit(data_train, 'class', estimator=MaximumLikelihoodEstimator)
# Print the CPDs learned
print("\n\n............Overview of our CPDs from the fit...........:")
for cpd in model.get_cpds():
    print("CPD of {variable}:".format(variable=cpd.variable))
    print(cpd)

#################################################################################
##### Using the model to query
#################################################################################
# Doing exact inference using Variable Elimination
model_infer = VariableElimination(model)
# Computing the probability of class given sex
# print("\n\n............Here are some queries...............")
# q1 = model_infer.query(variables=['class'], evidence={'sex':0})
# print(q1['class'])