Python NaiveBayesClassifier示例，text.classifiers.NaiveBayesClassifier Python示例

示例#1

0

显示文件

文件： test.py 项目： LiuyinC/MDLab

def test_Textblog():
    train = [('I love this sandwich.', 'pos'),
             ('This is an amazing place!', 'pos'),
             ('I feel very good about these beers.', 'pos'),
             ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
             ('I do not like this restaurant', 'neg'),
             ('I am tired of this stuff.', 'neg'),
             ("I can't deal with this", 'neg'),
             ('He is my sworn enemy!', 'neg'), ('My boss is horrible.', 'neg')]
    test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
            ("I ain't feeling dandy today.", 'neg'),
            ("I feel amazing!", 'pos'), ('Gary is a friend of mine.', 'pos'),
            ("I can't believe I'm doing this.", 'neg')]
    cl = NaiveBayesClassifier(train)
    #print cl.classify("Their burgers are amazing")  # "pos"
    #print cl.classify("I don't like their pizza.")  # "neg"
    import nltk
    new_train = []
    for item in train:
        token_sent = nltk.word_tokenize(item[0])

        item = list(item)
        item[0] = token_sent
        item[1] = item[1]
        item = tuple(item)
        new_train.append(item)

    print new_train
    cl = NaiveBayesClassifier(new_train)
    new_test = nltk.word_tokenize("I don't like their pizza.")
    print new_test, cl.classify(new_test)

示例#2

0

显示文件

文件： NBTextBlob.py 项目： ssj018/maxent

def nb(data):
    # check out params

    # divide data into 4 = 3 + 1, 3 for train, 1 for test
    train = data[0:(len(data) / 4) * 3]
    test = data[(len(data) / 4) * 3:]

    print "Training ..."
    classifier = NaiveBayesClassifier(train)
    print "Testing ..."
    print "Accuracy: ", classifier.accuracy(test)
    """

示例#3

0

显示文件

文件： NBTextBlob.py 项目： csrgxtu/maxent

def nb(data):
  # check out params
  
  # divide data into 4 = 3 + 1, 3 for train, 1 for test
  train = data[0: (len(data) / 4) * 3]
  test = data[(len(data) / 4) * 3:]
  
  print "Training ..."
  classifier = NaiveBayesClassifier(train)
  print "Testing ..."
  print "Accuracy: ", classifier.accuracy(test)
  
  """

示例#4

0

显示文件

 def setUp(self):
     self.train_set = [('I love this car', 'positive'),
                       ('This view is amazing', 'positive'),
                       ('I feel great this morning', 'positive'),
                       ('I am so excited about the concert', 'positive'),
                       ('He is my best friend', 'positive'),
                       ('I do not like this car', 'negative'),
                       ('This view is horrible', 'negative'),
                       ('I feel tired this morning', 'negative'),
                       ('I am not looking forward to the concert',
                        'negative'), ('He is my enemy', 'negative')]
     self.classifier = NaiveBayesClassifier(self.train_set)
     self.test_set = [('I feel happy this morning', 'positive'),
                      ('Larry is my friend.', 'positive'),
                      ('I do not like that man.', 'negative'),
                      ('My house is not great.', 'negative'),
                      ('Your song is annoying.', 'negative')]

示例#5

0

显示文件

文件： test.py 项目： LiuyinC/MDLab

def test_Textblog():
    train = [
        ('I love this sandwich.', 'pos'),
        ('This is an amazing place!', 'pos'),
        ('I feel very good about these beers.', 'pos'),
        ('This is my best work.', 'pos'),
        ("What an awesome view", 'pos'),
        ('I do not like this restaurant', 'neg'),
        ('I am tired of this stuff.', 'neg'),
        ("I can't deal with this", 'neg'),
        ('He is my sworn enemy!', 'neg'),
        ('My boss is horrible.', 'neg')
    ]
    test = [
        ('The beer was good.', 'pos'),
        ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'),
        ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')
    ]
    cl = NaiveBayesClassifier(train)
    #print cl.classify("Their burgers are amazing")  # "pos"
    #print cl.classify("I don't like their pizza.")  # "neg"
    import nltk
    new_train = []
    for item in train:
        token_sent = nltk.word_tokenize(item[0])

        item = list(item)
        item[0] = token_sent
        item[1] = item[1]
        item = tuple(item)
        new_train.append(item)

    print new_train
    cl = NaiveBayesClassifier(new_train)
    new_test = nltk.word_tokenize("I don't like their pizza.")
    print new_test, cl.classify(new_test)

示例#6

0

显示文件

文件： Project_2_1.py 项目： LiuyinC/MDLab

def classifier_NB(training_dict, contro_list):
    classifier_list = {}
    for topic in training_dict.keys():
        train_topic = []
        train_topic.extend(contro_list)
        for article in training_dict[topic]:
            cont_kw = article.content_keywords()
            if cont_kw != []:
                item = (cont_kw, topic)
                train_topic.append(item)
        if train_topic != []:
            topic_cl = NaiveBayesClassifier(train_topic)
            classifier_list.update({topic: topic_cl})
    return classifier_list  # return (key, value) = (topic, topic classifier)

示例#7

0

显示文件

文件： test_classifiers.py 项目： robertlayton/TextBlob

 def setUp(self):
     self.train_set =  [
           ('I love this car', 'positive'),
           ('This view is amazing', 'positive'),
           ('I feel great this morning', 'positive'),
           ('I am so excited about the concert', 'positive'),
           ('He is my best friend', 'positive'),
           ('I do not like this car', 'negative'),
           ('This view is horrible', 'negative'),
           ('I feel tired this morning', 'negative'),
           ('I am not looking forward to the concert', 'negative'),
           ('He is my enemy', 'negative')
     ]
     self.classifier = NaiveBayesClassifier(self.train_set)
     self.test_set = [('I feel happy this morning', 'positive'),
                     ('Larry is my friend.', 'positive'),
                     ('I do not like that man.', 'negative'),
                     ('My house is not great.', 'negative'),
                     ('Your song is annoying.', 'negative')]

示例#8

0

显示文件

文件： test_classifiers.py 项目： syllog1sm/TextBlob

class TestNaiveBayesClassifier(unittest.TestCase):
    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_basic_extractor(self):
        text = "I feel happy this morning."
        feats = basic_extractor(text, train_set)
        assert_true(feats["contains(feel)"])
        assert_true(feats['contains(morning)'])
        assert_false(feats["contains(amazing)"])

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text),
                     basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(
            ["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                     self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

    def test_init_with_tsv_file(self):
        cl = NaiveBayesClassifier(TSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    @attr("py27_only")
    def test_init_with_bad_format_specifier(self):
        with assert_raises(ValueError):
            NaiveBayesClassifier(CSV_FILE, format='unknown')

示例#9

0

显示文件

 def test_init_with_bad_format_specifier(self):
     with assert_raises(ValueError):
         NaiveBayesClassifier(CSV_FILE, format='unknown')

示例#10

0

显示文件

文件： test_classifiers.py 项目： allenwade3/TextBlob

 def test_init_with_json_file(self):
     cl = NaiveBayesClassifier(JSON_FILE, format="json")
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

示例#11

0

显示文件

文件： spamvsham.py 项目： anishmashankar/experiments

			msg = TextBlob(tabsep[1])
			try:
				words=msg.words
			except:
				continue
			for word in words:
				if word not in stopwords.words() and not word.isdigit():
					list_tuples.append((word.lower(),tabsep[0]))
			c+=1
			if c==500:
				break
	return list_tuples
print 'importing data...'
a = time.time()
entire_data = get_list_tuples("/home/anish/Documents/DataSci/DataSets/sms/SMSSpamCollection")
print "It took "+str(time.time()-a)+" seconds to import data"
print 'data imported'
random.seed(1)
random.shuffle(entire_data)
train = entire_data[:250]
test = entire_data[251:500]
print 'training data'
a = time.time()
cl = NaiveBayesClassifier(train)
print "It took "+str(time.time()-a)+" seconds to train data"
print 'data trained, now checking accuracy:'
accuracy = cl.accuracy(test)
print "accuracy: "+str(accuracy)
print cl.classify("Hey bud, what's up") #ham
print cl.classify("Get a brand new mobile phone by being an agent of The Mob! Plus loads more goodies! For more info just text MAT to 87021") #spam

示例#12

0

显示文件

文件： basic_sentiment.py 项目： tomaspdc/datascience

from text.classifiers import NaiveBayesClassifier

train = [
    ('I love this sandwich.', 'pos'),
    ('This is an amazing place!', 'pos'),
    ('I feel very good about these beers.', 'pos'),
    ('This is my best work.', 'pos'),
    ("What an awesome view", 'pos'),
    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]

print '> cl = NaiveBayesClassifier(train)'
cl = NaiveBayesClassifier(train)

print '> cl.classify("Their burgers are amazing")'
print cl.classify("Their burgers are amazing")

print '> cl.classify("I don\'t like their pizza.")'
print cl.classify("I don't like their pizza.")

示例#13

0

显示文件

文件： test_blob.py 项目： syllog1sm/TextBlob

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]

test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

classifier = NaiveBayesClassifier(train)


class WordListTest(TestCase):
    def setUp(self):
        self.words = 'Beautiful is better than ugly'.split()
        self.mixed = ['dog', 'dogs', 'blob', 'Blobs', 'text']

    def test_len(self):
        wl = tb.WordList(['Beautiful', 'is', 'better'])
        assert_equal(len(wl), 3)

    def test_slicing(self):
        wl = tb.WordList(self.words)
        first = wl[0]
        assert_true(isinstance(first, tb.Word))

示例#14

0

显示文件

def ask_with_guess(df, c=None):
	init()

	df['cat'] = ""

	if c is None:
		c = NaiveBayesClassifier([], extractor)

	categories = {1: 'Bill',
				  2: 'Supermarket',
				  3: 'Cash',
				  4: 'Petrol',
				  5: 'Eating Out',
				  6: 'Travel',
				  7: 'Unclassified',
				  8: 'House',
				  9: 'Books',
				  10: 'Craft',
				  11: 'Charity Shop',		  
				  12: 'Presents',
				  13: 'Toiletries',
				  14: 'Car',
				  15: 'Cheque',
				  16: 'Rent',
				  17: 'Paypal',
				  18: 'Ignore',
				  19: 'Expenses'
				  }

	for index, row in df.iterrows():
		#print Fore.GREEN + "-" * 72 + Fore.RESET

		

		# TODO: Make interface nicer
		# Ideas:
		# * Give list of categories at the end
		cats_list = ["%d: %s" % (id, cat) for id,cat in categories.iteritems()]
		new_list = []
		for item in cats_list:
		    if len(item.split(":")[1].strip()) < 5:
		        new_list.append(item + "\t\t\t")
		    else:
		        new_list.append(item + "\t\t")
		new_list[2::3] = map(lambda x: x+"\n", new_list[2::3])
		cats_joined = "".join(new_list)

		stripped_text = strip_numbers(row['desc'])

		if len(c.train_set) > 1:
			guess = c.classify(stripped_text)
		else:
			guess = ""


		# PRINTING STUFF
		print chr(27) + "[2J"
		print cats_joined
		print "\n\n"
		print "On: %s\t %.2f\n%s" % (row['date'], row['amount'], row['desc'])
		print Fore.RED  + Style.BRIGHT + "My guess is: " + guess + Fore.RESET

		res = raw_input("> ")

		if res.lower().startswith('q'):
			# Q = Quit
			return df,c
		if res == "":
			# Our guess was right!
			df.ix[index, 'cat'] = guess
			c.update([(stripped_text, guess)])
		else:
			# Our guess was wrong

			# Write correct answer
			df.ix[index, 'cat'] = categories[int(res)]
			# Update classifier
			c.update([(stripped_text, categories[int(res)])])

	return df,c

示例#15

0

显示文件

文件： fun.py 项目： sdsunjay/nlp-tutorials

import os
from text.classifiers import NaiveBayesClassifier

train = [('amor', "spanish"), ("perro", "spanish"), ("playa", "spanish"),
         ("sal", "spanish"), ("oceano", "spanish"), ("love", "english"),
         ("dog", "english"), ("beach", "english"), ("salt", "english"),
         ("ocean", "english")]
test = [("ropa", "spanish"), ("comprar", "spanish"), ("camisa", "spanish"),
        ("agua", "spanish"), ("telefono", "spanish"), ("clothes", "english"),
        ("buy", "english"), ("shirt", "english"), ("water", "english"),
        ("telephone", "english")]


def extractor(word):
    '''Extract the last letter of a word as the only feature.'''
    feats = {}
    last_letter = word[-1]
    feats["last_letter({0})".format(last_letter)] = True
    return feats


lang_detector = NaiveBayesClassifier(train, feature_extractor=extractor)
print(lang_detector.accuracy(test))
print(lang_detector.show_informative_features(5))

示例#16

0

显示文件

文件： test_classifiers.py 项目： vambati/TextBlob

 def test_init_with_tsv_file(self):
     cl = NaiveBayesClassifier(TSV_FILE)
     assert_equal(cl.classify("I feel happy this morning"), "pos")
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

示例#17

0

显示文件

文件： grab_classify.py 项目： afcarl/grab_analyze

        train.append((val, "english"))

with open("spanish.txt", "r") as span:
    for ind, val in enumerate(span):
        try:
            val = val.encode("ascii", "ignore")
            val = val.replace("\t", "")
            val = val.replace("\n", "")
            val = val.replace("\r", "")
        except UnicodeDecodeError:
            continue

        train.append((val, "spanish"))

cl = NaiveBayesClassifier(train)

english_links = open("english_links.txt", "w")
spanish_links = open("spanish_links.txt", "w")

for link in classes:
    r = requests.get(link)
    html = lxml.html.fromstring(r.text)
    obj = html.xpath('//div[@class="postingBody"]')
    post_body = [elem.text_content() for elem in obj]
    if post_body != []:
        text = post_body[0]
    try:
        text = text.encode("ascii", "ignore")
        text = text.replace("\t", "")
        text = text.replace("\n", "")

示例#18

0

显示文件

文件： Text classification using text blob.py 项目： rojinva/Email-classifier

train = []

book = open_workbook('C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_Train.xls')
sheet1 = book.sheet_by_index(0)
print "Training.............\n"
for j in range(sheet1.nrows):
      line1 = sheet1.cell_value(j,1)
      line1 = re.sub('[\-*>]', '', line1)
      line1 = re.sub('[\n]', '', line1)
      line2 = sheet1.cell_value(j,2)
      stored = [(line1, line2)]
      train = train + stored

print  "Training algo....\n"
cl = NaiveBayesClassifier(train)

book = open_workbook('C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_Test.xls')
sheet = book.sheet_by_index(0)

book1 = xlwt.Workbook()
sh = book1.add_sheet("sheet")

print "Classifying..........."

for j in range(sheet.nrows):
    id = sheet.cell_value(j,0)
    line = sheet.cell_value(j,1)
    line = re.sub('[-*>]', '', line)
    line = re.sub('[\n]', '', line)
    a = cl.classify(line)

示例#19

0

显示文件

文件： test_classifiers.py 项目： syllog1sm/TextBlob

 def test_train_from_lists_of_words(self):
     # classifier can be trained on lists of words instead of strings
     train = [(doc.split(), label) for doc, label in train_set]
     classifier = NaiveBayesClassifier(train)
     assert_equal(classifier.accuracy(test_set),
                  self.classifier.accuracy(test_set))

示例#20

0

显示文件

文件： test_classifiers.py 项目： syllog1sm/TextBlob

 def setUp(self):
     self.classifier = NaiveBayesClassifier(train_set)

示例#21

0

显示文件

random.seed(1)

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
           for category in movie_reviews.categories()
           for fileid in movie_reviews.fileids(category)]

cl.update(reviews[101:200])

random.shuffle(reviews)

print type(reviews)

for it in reviews[0:4]:
    print it
    print 'Sentiment Analysis:', cl.classify(it[0])

示例#22

0

显示文件

random.seed(1)

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]
print 'initial training going on....'
cl = NaiveBayesClassifier(train)
print 'initial training done.'
# Grab some movie review data
print 'now gathering reviews...'
reviews = [(list(movie_reviews.words(fileid)), category)
           for category in movie_reviews.categories()
           for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train = reviews[0:200]
print 'reviews gathered.'
# Update the classifier with the new training data
print 'now training using the new data...'
cl.update(new_train)
print 'trained and ready!'
print cl.classify("I hated the movie and hated the food")
# Compute accuracy

示例#23

0

显示文件

文件： naive.py 项目： hllk/ISIZgadujemyDaty

contents_train = inputfile.readlines()
inputfile.close()

#dev - years
inputfile = codecs.open("years-dev.txt", 'r', 'utf-8')
dev_train = inputfile.readlines()
inputfile.close()

#dev - content
inputfile = codecs.open("contents-dev.txt", 'r', 'utf-8')
contents_dev = inputfile.readlines()
inputfile.close()

#training set
train_set = []
g = range(0, 4000, 2)
for i in g:
    train_set.append((contents_train[i], years_train[i / 2]))

print "tu się robi"
cl = NaiveBayesClassifier(train_set)
print "a tu się zrobiło"
outputfile = open("classified.txt", "w")
g = range(0, len(contents_dev), 2)
for i in g:
    result = cl.classify(contents_dev[i])
    print i
    outputfile.write(str(result))
print "zmieliło"
outputfile.close()

示例#24

0

显示文件

文件： text_classif2.py 项目： malab/test-python

random.seed(1)

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
           for category in movie_reviews.categories()
           for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train, new_test = reviews[0:100], reviews[101:200]

# Update the classifier with the new training data
cl.update(new_train)

# Compute accuracy
accuracy = cl.accuracy(test + new_test)
print("Accuracy: {0}".format(accuracy))

示例#25

0

显示文件

文件： classify.py 项目： GayathriSrinivas/cmpe239_project

infile = "data/yelp_academic_dataset_review.json"

# read the first 1000 reviews
i = 0
fin = open(infile, 'r')
data = []
for line in fin:
    review = json.loads(line)
    data.append((review['text'], float(review['stars'])))
    if i == 1000:
        break
    i += 1
fin.close()

k = 500
training_set, test_set = data[:k], data[k:]
print "building classifier"
cl = NaiveBayesClassifier(training_set)
print "built classifier"

# Compute accuracy
print "computing accuracy"
print("Accuracy: {0}".format(cl.accuracy(test_set)))
print "computed accuracy"
 
# Show 5 most informative features
print "showing features"
cl.show_informative_features(5)
print "done :)"

示例#26

0

显示文件

文件： classifier.py 项目： AlinaKay/Popart

    ('NFL MLB NBA NHL MMA college football and basketball NASCAR fantasy sports', 'Sport'),
    ('global warming extrasolar planets stem cells bird flu autism nano dinosaurs evolution.', 'Science'),
    ('wormholes outer space engineering humans smartest animal far-Off Planets Like the Earth Dot the Galaxy.', 'Science'),
    ('Science demystifies natural engineering space military physics, dreams supernatural phenomena.', 'Science'),
    ("microbe mammal origins evolution life forms. Explore biology genetics evolution", 'Science'),
    ('art news exhibitions events artists galleries museums editions books mapping the art.', 'Art'),
    ('art daily art Museums Exhibits Artists Milestones Digital Art Architecture', 'Art'),
    ("exhibitions interesting random weirdness photography painting prints design sculpture.", 'Art'),
    ('artists galleries museums and auction houses movies documentary.', 'Art'),
    ('Medicine, Health, Drugs, drugs fitness nutrition health care mental health drugs diet pregnancy babies cancer AIDS allergies & asthma.', 'Health'),
    ('Drugs supplements living healthy family pregnancy, energizing moves recipes losing weight feeling great.', 'Health'),
    ('Weight Loss & Diet Plans Food & Recipes Fitness & Exercise Beauty Balance & Love Sex & Relationships Oral Care yoga Aging Well.', 'Health'),
    ('Conceive Parenting Newborn & Baby Children Vaccines Raising Fit Kids Pets.', 'Health')
]
## CREATING THE CLASSIFIER ##
cl = NaiveBayesClassifier(train)

for articles in db_collection_tweets.find({'content': {'$exists': True}}):
    #print articles['full_url']
    category = cl.classify(articles['content'])
    db_collection_tweets.update({ '_id' : articles['_id'] }, { '$set' : { 'Category': category} } )

## DISTRIBUTION OF THE CATEGORIES IN THE SAMPLE ##

# Listing all the categories
list_cat = []
for articles in db_collection_tweets.find({'Category': {'$exists' : True}}):
    list_cat.append(articles['Category'])    

# Counting the number of occurences of each category
cat_dict = {}

示例#27

0

显示文件

 def test_init_with_csv_file_without_format_specifier(self):
     cl = NaiveBayesClassifier(CSV_FILE)
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

示例#28

0

显示文件

文件： test_classifiers.py 项目： allenwade3/TextBlob

 def test_init_with_csv_file_without_format_specifier(self):
     cl = NaiveBayesClassifier(CSV_FILE)
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

示例#29

0

显示文件

文件： sentana.py 项目： anishmashankar/experiments

('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
print 'initial training going on....'
cl = NaiveBayesClassifier(train)
print 'initial training done.'
# Grab some movie review data
print 'now gathering reviews...'
reviews = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train = reviews[0:200]
print 'reviews gathered.'
# Update the classifier with the new training data
print 'now training using the new data...'
cl.update(new_train)
print 'trained and ready!'
print cl.classify("I hated the movie and hated the food")
# Compute accuracy

示例#30

0

显示文件

文件： naive.py 项目： hllk/ISIZgadujemyDaty

#dev - years
inputfile = codecs.open("years-dev.txt", 'r', 'utf-8')
dev_train = inputfile.readlines()
inputfile.close()

#dev - content
inputfile = codecs.open("contents-dev.txt", 'r', 'utf-8')
contents_dev = inputfile.readlines()
inputfile.close()

#training set
train_set = []
g = range(0, 4000, 2)
for i in g:
	train_set.append((contents_train[i], years_train[i/2]))


print "tu się robi"	
cl = NaiveBayesClassifier(train_set)
print "a tu się zrobiło"
outputfile = open("classified.txt", "w")
g = range(0, len(contents_dev), 2)
for i in g:
	result = cl.classify(contents_dev[i])
	print i
	outputfile.write(str(result))
print "zmieliło"
outputfile.close()

示例#31

0

显示文件

文件： test_classifiers.py 项目： robertlayton/TextBlob

class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.train_set =  [
              ('I love this car', 'positive'),
              ('This view is amazing', 'positive'),
              ('I feel great this morning', 'positive'),
              ('I am so excited about the concert', 'positive'),
              ('He is my best friend', 'positive'),
              ('I do not like this car', 'negative'),
              ('This view is horrible', 'negative'),
              ('I feel tired this morning', 'negative'),
              ('I am not looking forward to the concert', 'negative'),
              ('He is my enemy', 'negative')
        ]
        self.classifier = NaiveBayesClassifier(self.train_set)
        self.test_set = [('I feel happy this morning', 'positive'),
                        ('Larry is my friend.', 'positive'),
                        ('I do not like that man.', 'negative'),
                        ('My house is not great.', 'negative'),
                        ('Your song is annoying.', 'negative')]

    def test_basic_extractor(self):
        text = "I feel happy this morning."
        feats = basic_extractor(text, self.train_set)
        assert_true(feats["contains(feel)"])
        assert_true(feats['contains(morning)'])
        assert_false(feats["contains(amazing)"])

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, self.train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(self.train_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(self.test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(self.train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

示例#32

0

显示文件

class TestNaiveBayesClassifier(unittest.TestCase):
    def setUp(self):
        self.train_set = [('I love this car', 'positive'),
                          ('This view is amazing', 'positive'),
                          ('I feel great this morning', 'positive'),
                          ('I am so excited about the concert', 'positive'),
                          ('He is my best friend', 'positive'),
                          ('I do not like this car', 'negative'),
                          ('This view is horrible', 'negative'),
                          ('I feel tired this morning', 'negative'),
                          ('I am not looking forward to the concert',
                           'negative'), ('He is my enemy', 'negative')]
        self.classifier = NaiveBayesClassifier(self.train_set)
        self.test_set = [('I feel happy this morning', 'positive'),
                         ('Larry is my friend.', 'positive'),
                         ('I do not like that man.', 'negative'),
                         ('My house is not great.', 'negative'),
                         ('Your song is annoying.', 'negative')]

    def test_basic_extractor(self):
        text = "I feel happy this morning."
        feats = basic_extractor(text, self.train_set)
        assert_true(feats["contains(feel)"])
        assert_true(feats['contains(morning)'])
        assert_false(feats["contains(amazing)"])

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text),
                     basic_extractor(text, self.train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(self.train_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(self.test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(self.train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

    def test_init_with_tsv_file(self):
        cl = NaiveBayesClassifier(TSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    @attr("py27_only")
    def test_init_with_bad_format_specifier(self):
        with assert_raises(ValueError):
            NaiveBayesClassifier(CSV_FILE, format='unknown')

示例#33

0

显示文件

文件： text_classif2.py 项目： malab/test-python

    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]
 
cl = NaiveBayesClassifier(train)
 
# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
              for category in movie_reviews.categories()
              for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train, new_test = reviews[0:100], reviews[101:200]
 
# Update the classifier with the new training data
cl.update(new_train)
 
# Compute accuracy
accuracy = cl.accuracy(test + new_test)
print("Accuracy: {0}".format(accuracy))

示例#34

0

显示文件

 def test_custom_feature_extractor(self):
     cl = NaiveBayesClassifier(self.train_set, custom_extractor)
     cl.classify("Yay! I'm so happy it works.")
     assert_equal(cl.train_features[0][1], 'positive')

示例#35

0

显示文件

文件： grab_classify.py 项目： EricSchles/grab_analyze

        train.append((val, "english"))

with open("spanish.txt", "r") as span:
    for ind, val in enumerate(span):
        try:
            val = val.encode("ascii", "ignore")
            val = val.replace("\t", "")
            val = val.replace("\n", "")
            val = val.replace("\r", "")
        except UnicodeDecodeError:
            continue

        train.append((val, "spanish"))


cl = NaiveBayesClassifier(train)

english_links = open("english_links.txt", "w")
spanish_links = open("spanish_links.txt", "w")

for link in classes:
    r = requests.get(link)
    html = lxml.html.fromstring(r.text)
    obj = html.xpath('//div[@class="postingBody"]')
    post_body = [elem.text_content() for elem in obj]
    if post_body != []:
        text = post_body[0]
    try:
        text = text.encode("ascii", "ignore")
        text = text.replace("\t", "")
        text = text.replace("\n", "")

示例#36

0

显示文件

 def test_init_with_json_file(self):
     cl = NaiveBayesClassifier(JSON_FILE, format="json")
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

示例#37

0

显示文件

文件： test_classifiers.py 项目： shidao-fm/TextBlob

class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_basic_extractor(self):
        text = "I feel happy this morning."
        feats = basic_extractor(text, train_set)
        assert_true(feats["contains(feel)"])
        assert_true(feats['contains(morning)'])
        assert_false(feats["contains(amazing)"])

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

    def test_init_with_tsv_file(self):
        cl = NaiveBayesClassifier(TSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    @attr("py27_only")
    def test_init_with_bad_format_specifier(self):
        with assert_raises(ValueError):
            NaiveBayesClassifier(CSV_FILE, format='unknown')

示例#38

0

显示文件

文件： shit.py 项目： jluc19/disambiguator

    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]
 
cl = NaiveBayesClassifier(train)
 
# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))   # "neg"
 
# Classify a TextBlob
blob = TextBlob("The beer was amazing. But the hangover was horrible. "
                "My boss was not pleased.", classifier=cl)
print(blob)
print(blob.classify())
 
for sentence in blob.sentences:
    print(sentence)
    print(sentence.classify())

示例#39

0

显示文件

文件： test_classifiers.py 项目： shidao-fm/TextBlob

 def setUp(self):
     self.classifier = NaiveBayesClassifier(train_set)

示例#40

0

显示文件

文件： test_classifiers.py 项目： robertlayton/TextBlob

 def test_custom_feature_extractor(self):
     cl = NaiveBayesClassifier(self.train_set, custom_extractor)
     cl.classify("Yay! I'm so happy it works.")
     assert_equal(cl.train_features[0][1], 'positive')

示例#41

0

显示文件

文件： test_classifiers.py 项目： shidao-fm/TextBlob

 def test_train_from_lists_of_words(self):
     # classifier can be trained on lists of words instead of strings
     train = [(doc.split(), label) for doc, label in train_set]
     classifier = NaiveBayesClassifier(train)
     assert_equal(classifier.accuracy(test_set),
                     self.classifier.accuracy(test_set))

示例#42

0

显示文件

文件： shit.py 项目： jluc19/disambiguator

from text.blob import TextBlob

train = [('I love this sandwich.', 'pos'),
         ('This is an amazing place!', 'pos'),
         ('I feel very good about these beers.', 'pos'),
         ('This is my best work.', 'pos'), ("What an awesome view", 'pos'),
         ('I do not like this restaurant', 'neg'),
         ('I am tired of this stuff.', 'neg'),
         ("I can't deal with this", 'neg'), ('He is my sworn enemy!', 'neg'),
         ('My boss is horrible.', 'neg')]
test = [('The beer was good.', 'pos'), ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'), ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')]

cl = NaiveBayesClassifier(train)

# Classify some text
print(cl.classify("Their burgers are amazing."))  # "pos"
print(cl.classify("I don't like their pizza."))  # "neg"

# Classify a TextBlob
blob = TextBlob(
    "The beer was amazing. But the hangover was horrible. "
    "My boss was not pleased.",
    classifier=cl)
print(blob)
print(blob.classify())

for sentence in blob.sentences:
    print(sentence)

示例#43

0

显示文件

文件： bayes.py 项目： kaushikmit/mlscripts

    ('I do not like this restaurant', 'neg'),
    ('I am tired of this stuff.', 'neg'),
    ("I can't deal with this", 'neg'),
    ('He is my sworn enemy!', 'neg'),
    ('My boss is horrible.', 'neg')
]
test = [
    ('The beer was good.', 'pos'),
    ('I do not enjoy my job', 'neg'),
    ("I ain't feeling dandy today.", 'neg'),
    ("I feel amazing!", 'pos'),
    ('Gary is a friend of mine.', 'pos'),
    ("I can't believe I'm doing this.", 'neg')
]
 
cl = NaiveBayesClassifier(train)
 
# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
              for category in movie_reviews.categories()
              for fileid in movie_reviews.fileids(category)]

cl.update(reviews[101:200])


random.shuffle(reviews)

print type(reviews)

for it in reviews[0:4]:
	print it