Пример #1
0
def build_bayes_graph(img, labels, sigma=1e2, kappa=2):
    """ Build a graph from 4-neighborhood of pixels.
        Foreground and background is determined from
        labels (1 for foreground, 0 for background)
        and is modeled with naive Bayes classifiers."""
    m, n = img.shape[:2]
    # RGB vector version (one pixel per row)
    vim = img.reshape((-1, 3))
    # RGB for foreground and background
    foreground = img[labels == 1].reshape((-1, 3))
    background = img[labels == 0].reshape((-1, 3))
    train_data = [foreground, background]
    # train naive Bayes classifier
    bc = BayesClassifier()
    bc.train(train_data)
    # get probabilities for all pixels
    bc_lables, prob = bc.classify(vim)
    prob_fg, prob_bg = prob[0], prob[1]
    print(np.amax(prob_fg), np.max(prob_bg))
    # create graph with m*n+2 nodes
    gr = Graph()
    gr.add_node(range(m * n + 2))
    source = m * n  # second to last is source
    sink = m * n + 1  # last node is sink
    # normalize
    for i in range(vim.shape[0]):
        vim[i] = vim[i] / np.linalg.norm(vim[i])
    # go through all nodes and add edges
    for i in range(m * n):
        print(i)
        # add edge from source
        gr.add_edge((source, i), (prob_fg[i] / (prob_fg[i] + prob_bg[i])))
        # add edge to sink
        gr.add_edge((i, sink), (prob_bg[i] / (prob_fg[i] + prob_bg[i])))
        # add edges to neighbors
        if i % n != 0:  # left exists
            edge_wt = kappa * \
                np.exp(-1.0 * sum((vim[i] - vim[i - 1])**2) / sigma)
            gr.add_edge((i, i - 1), edge_wt)
        if (i + 1) % n != 0:  # right exists
            edge_wt = kappa * \
                np.exp(-1.0 * sum((vim[i] - vim[i + 1])**2) / sigma)
            gr.add_edge((i, i + 1), edge_wt)
        if i // n != 0:  # up exists
            edge_wt = kappa * \
                np.exp(-1.0 * sum((vim[i] - vim[i - n])**2) / sigma)
            gr.add_edge((i, i - n), edge_wt)
        if i // n != m - 1:  # down exists
            edge_wt = kappa * \
                np.exp(-1.0 * sum((vim[i] - vim[i + n])**2) / sigma)
            gr.add_edge((i, i + n), edge_wt)
    gr.build_flow(source, sink)
    return gr
Пример #2
0
    words = re.findall("[a-z0-9']+", " ".join(line_split[1:]))
    return msg_type, set(words)


if __name__ == '__main__':
    with open('data/data.txt', 'r') as f:
        messages = []
        for line in f:
            msg_type, words = tokenize(line)
            messages.append({'msg_type': msg_type, 'words': words})

        training_set = messages[:int(len(messages) * 0.75)]
        testing_set = messages[int(len(messages) * 0.75):]

        bayes = BayesClassifier()
        bayes.train(training_set)
        classified = bayes.classify(testing_set)

        true_positive = len([
            1 for message in classified
            if message['msg_type'] == 'spam' and message['prob_spam'] > 0.5
        ])
        false_positive = len([
            1 for message in classified
            if message['msg_type'] == 'ham' and message['prob_spam'] > 0.5
        ])
        true_negative = len([
            1 for message in classified
            if message['msg_type'] == 'ham' and message['prob_spam'] <= 0.5
        ])
        false_negative = len([
Пример #3
0
#!/usr/bin/env python
#-*- encoding:utf-8 -*-

import sys, os

from preprocess import Preprocessor
from features import FeatureSelector
from bayes import BayesClassifier

if __name__ == '__main__':
    train_file = sys.argv[1]
    test_file = sys.argv[2]

    pr = Preprocessor()
    pr.build_vocabulary_and_categories(train_file)

    fs = FeatureSelector(train_file, ck = 500)
    fs.select_features()

    bc = BayesClassifier(train_file, test_file, model = 'bernoulli')
    bc.train()
    bc.test()