Exemplo n.º 1
0
    def GET(self, request):
        web.header('Access-Control-Allow-Origin', '*')
        if not web.svm:
            return 'error'

        text = web.input(_unicode=False)['text'].decode('utf-8', 'ignore')
        account_id = int(web.input(_unicode=False)['id'].decode('utf-8', 'ignore'))
        account_id = account_id or 1
        print(u"Classifying text [{0}]".format(text))
        features_list = [features.tweet_to_words(text)]
        if features_list and len(features_list) > 0:
            try:
                labels = web.svm[account_id].classify_many(features_list)
                if labels and len(labels) > 0:
                    return labels[0]
                else:
                    return 'empty'
            except Exception, e:
                traceback.print_exc()
                return 'error'
Exemplo n.º 2
0
    def GET(self, request):
        web.header('Access-Control-Allow-Origin', '*')
        if not web.svm:
            return 'error'

        text = web.input(_unicode=False)['text'].decode('utf-8', 'ignore')
        account_id = int(
            web.input(_unicode=False)['id'].decode('utf-8', 'ignore'))
        account_id = account_id or 1
        print(u"Classifying text [{0}]".format(text))
        features_list = [features.tweet_to_words(text)]
        if features_list and len(features_list) > 0:
            try:
                labels = web.svm[account_id].classify_many(features_list)
                if labels and len(labels) > 0:
                    return labels[0]
                else:
                    return 'empty'
            except Exception, e:
                traceback.print_exc()
                return 'error'
        if not w.lower() in stopwords.words('english')
    )
    word_features = list(all_words)[:500]
    nltk.FreqDist.pprint(all_words, 500)
    return word_features


print(decisions.categories())
documents = [(list(decisions.words(fileid)), category)
             for category in decisions.categories()
             for fileid in decisions.fileids(category)]

#random.shuffle(documents)
print (documents)

pos_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'pos']
neg_features = [(features.tweet_to_words(d), c) for (d, c) in documents if c == 'neg']

random.shuffle(pos_features)
random.shuffle(neg_features)

chosen_features_200 = pos_features[:100] + neg_features[:100]
random.shuffle(chosen_features_200)

featuresets = chosen_features_200

size = 200

train_set, test_set = featuresets[size / 2:], featuresets[:size / 2]
classifier = nltk.DecisionTreeClassifier.train(train_set)
print("Decision trees accuracy = [{0}]".format(
Exemplo n.º 4
0
home = os.path.expanduser("~")
path = os.path.join(
    home,
    'nltk_data{s}corpora{s}tweets_publish_choice_{account}{s}{version}'.format(
        account=account_id,
        version=version,
        s=os.sep,
    ))

print(decisions.categories())
documents = [(list(decisions.words(fileid)), category)
             for category in decisions.categories()
             for fileid in decisions.fileids(category)
             if os.path.getsize(os.path.join(path, fileid)) > 0]

pos_features = [(features.tweet_to_words(d), c) for (d, c) in documents
                if c == 'pos']
neg_features = [(features.tweet_to_words(d), c) for (d, c) in documents
                if c == 'neg']

chosen_features_limit = pos_features[:limit / 2] + neg_features[:limit / 2]
random.shuffle(chosen_features_limit)

featuresets = chosen_features_limit

train_set, test_set = featuresets[limit / 2:], featuresets[:limit / 2]

svm = SklearnClassifier(LinearSVC())
svm.train(train_set)

path = os.path.normpath('../model/svm/account_{0}/{1}/'.format(