Пример #1
0
        s = search('JJ',
                   s)  #searches for adjectives in tweets (JJ = adjectiive)
        s = [match[0].string for match in s]
        s = ' '.join(s)
        if len(s) > 0:
            corpus.append(Document(s, type=p))
            corpus.append(Document(s, type=m))

classifier = KNN()  #k-nearest neighbor classifier = K-NN
objects = []

for document in corpus:  #documents are an unordered bag of given sentences.

    classifier.train(
        document)  #adjective vectors in corpus trains the classifier
    objects.append(classifier.classify('awesome'))  #predicts awesome as win
    objects.append(classifier.classify('cool'))  #predicts cool as win
    objects.append(classifier.classify('damn'))  #predicts damn as fail
    objects.append(classifier.classify('sucks'))  #predicts sucks as fail

print objects
wincounter = 0
failcounter = 0
for thing in objects:
    if thing == 'WIN':
        wincounter += 1
    elif thing == 'FAIL':
        failcounter += 1
    else:
        pass
Пример #2
0
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
for document in m:  # (classify unknown documents with the most frequent type).
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.features)
print

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print classifier.classify('sweet potato burger')  # yields 'WIN'
print classifier.classify('stupid autocorrect')  # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict:
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - the author of medieval poems,
# - spam in  e-mail messages,
# - lies & deception in text,
# - doubt & uncertainty in text,
# and to:
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - win at Jeopardy!,
# - win at rock-paper-scissors,
# and so on...
Пример #3
0
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
# (classify unknown documents with the most frequent type).
for document in m:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print(sorted(classifier.features))
print()

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print(classifier.classify('sweet potato burger'))  # yields 'WIN'
print(classifier.classify('stupid autocorrect'))  # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict:
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - the author of medieval poems,
# - spam in  e-mail messages,
# - lies & deception in text,
# - doubt & uncertainty in text,
# and to:
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - win at Jeopardy!,
# - win at rock-paper-scissors,
# and so on...
Пример #4
0
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None) # By default, baseline=MAJORITY
for document in m:              # (classify unknown documents with the most frequent type).
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.features)
print

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print classifier.classify('sweet')  # yields 'WIN'
print classifier.classify('stupid') # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict:
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - the author of medieval poems,
# - spam in  e-mail messages,
# - lies & deception in text,
# - doubt & uncertainty in text,
# and to:
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - win at Jeopardy!,
# - win at rock-paper-scissors,
# and so on...
Пример #5
0
# you would need a lot more training data (e.g., tens of thousands of tweets).
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN()
for document in corpus:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.terms)
print

# We can ask it to classify texts containing those words.
# Note that you may get different results than the ones indicated below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
print classifier.classify('sweet')  # yields 'WIN'
print classifier.classify('stupid') # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict
# - the author of medieval poems,
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - predict spam e-mail messages,
# - predict lies in text,
# - predict doubt and uncertainty in text,
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - to win at jeopardy,
# - to win at rock-paper-scissors,
# and so on...
Пример #6
0
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN(baseline=None)  # By default, baseline=MAJORITY
for document in m:  # (classify unknown documents with the most frequent type).
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.features)
print

# We can now ask it to classify documents containing these words.
# Note that you may get different results than the ones below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
# If None is returned, the word was not recognized,
# and the classifier returned the default value (see above).
print classifier.classify("sweet potato burger")  # yields 'WIN'
print classifier.classify("stupid autocorrect")  # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict:
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - the author of medieval poems,
# - spam in  e-mail messages,
# - lies & deception in text,
# - doubt & uncertainty in text,
# and to:
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - win at Jeopardy!,
# - win at rock-paper-scissors,
# and so on...
Пример #7
0
# The more training data, the more statistically reliable the classifier becomes.
# The only way to really know if you're classifier is working correctly
# is to test it with testing data, see the documentation for Classifier.test().
classifier = KNN()
for document in corpus:
    classifier.train(document)

# These are the adjectives the classifier has learned:
print sorted(classifier.terms)
print

# We can ask it to classify texts containing those words.
# Note that you may get different results than the ones indicated below,
# since you will be mining other (more recent) tweets.
# Again, a robust classifier needs lots and lots of training data.
print classifier.classify("sweet")  # yields 'WIN'
print classifier.classify("stupid")  # yields 'FAIL'

# "What can I do with it?"
# In the scientific community, classifiers have been used to predict
# - the author of medieval poems,
# - the opinion (positive/negative) in product reviews on blogs,
# - the age of users posting on social networks,
# - predict spam e-mail messages,
# - predict lies in text,
# - predict doubt and uncertainty in text,
# - improve search engine query results (e.g., where "jeans" queries also yield "denim" results),
# - to win at jeopardy,
# - to win at rock-paper-scissors,
# and so on...