Exemplo n.º 1
class Classifications():

    #static variables
    _category_path = os.path.join(os.path.dirname(__file__),
    _rating_path = os.path.join(os.path.dirname(__file__),
    _rating_nlp_path = os.path.join(os.path.dirname(__file__),
    _sentiment_path = os.path.join(os.path.dirname(__file__),

    _category = SLP.load(_category_path)
    _rating = SLP.load(_rating_path)
    _rating_nlp = SVM.load(_rating_nlp_path)
    _sentiment = NB.load(_sentiment_path)

    def selectWords(review):
        a function that gets a review and selects the nouns, adjectives, verbs and exclamation mark
        review = parsetree(review, lemmata=True)[0]  #lemmatize the review
        #select adjectives (JJ), nouns (NN), verbs (VB) and exclamation marks
        review = [
            w.lemma for w in review
            if w.tag.startswith(('JJ', 'NN', 'VB', '!'))
        review = count(review)  #a dictionary of (word, count)
        return review

    def classify(text):
        predicted_category = Classifications._category.classify(Document(text),
        predicted_rate = Classifications._rating.classify(Document(text),
        predicted_rate_nlp = Classifications._rating_nlp.classify(
            Classifications.selectWords(text), discrete=True)
        predicted_sentiment_dict = Classifications._sentiment.classify(
            Classifications.selectWords(text), discrete=False)
        predicted_sentiment = True if str(
                   reverse=True)[1][0]) in ['True', '3.0', '4.0', '5.0'
                                            ] else False

        return {
            'text': text,
            'rate': predicted_rate,
            'category': predicted_category,
            'rate_nlp': predicted_rate_nlp,
            'positivity': predicted_sentiment
Exemplo n.º 2
views imports app, auth, and models, but none of these import views
from flask import render_template, redirect, request, url_for, jsonify
from flask.ext.classy import FlaskView

from app import app
from auth import auth
from models import User

# Classifier, CSV Loading
from pattern.vector import Document, NB
from pattern.db import Datasheet

# Load classifier
nb = NB.load("project/data/amazonClassifier")

@app.route('/classify', methods=['POST'])
def classify_review():
    text = request.form.get('text')
    return jsonify(result=nb.classify(text.strip()))

class BaseView(FlaskView):
    '''Basic views, such as the home and about page.'''
    route_base = '/'

    def index(self):
        return render_template('home.html')
class NBModel:
    def __init__(self):
        self.nb = NB()
        self.stats = Statistics()
            print("dir: " + os.getcwd())
            if os.getcwd().endswith("tv_ratings_frontend"):
                print("Working in django")
                self.nb = self.nb.load("ratings_frontend/backend/pattern_ml/nb_training.p")
                print("Not working in django")
                self.nb = self.nb.load("./nb_training.p")
            self.new_nb_model = True
            print("Using existing pickled model")
        except IOError:
            self.new_nb_model = False
            print("Creating new NB model")

    def nb_train_text(self, reviews):
        for review in reviews:
            if review.rating is not None:# and review.rating < 10 and review.rating > 1:
                v = Document(review.text, type=int(review.rating), stopwords=True)
                #   print self.nb.classes

    def nb_train_summary(self, reviews):
        for review in reviews:
            if review.rating is not None:# and review.rating < 10 and review.rating > 1:
                v = Document(review.summary, type=int(review.rating), stopwords=True)

    def nb_train_all_text(self, review_set):
        for review_list in review_set:

    def save_model(self):
    #    print ""

    def nb_test_imdb(self, reviews):
        arr = []
        for review in reviews:
            if review.rating is not None:
                v = Document(self.review_to_words(review.text), type=int(review.rating), stopwords=True)
        print self.nb.test(arr, target=None)

    def nb_classify_tweets(self, tvshow, tweets):
        ratingSum = 0
        tweet_docs = [(self.nb.classify(Document(self.review_to_words(tweet))), self.review_to_words(tweet)) for tweet in tweets]
        for tweet in tweet_docs:
            ratingSum += tweet[0]
            #print tweet
           # print tweet
        Statistics().printStats(tvshow, ratingSum, len(tweet_docs))
        print self.nb.distribution

        return Statistics().get_stats(tvshow, ratingSum, len(tweet_docs))

    def nb_stats(self):
        print('----------- Classifier stats -----------')
        #  print("Features: ", self.nb.features)
        print("Classes: ", self.nb.classes)
        print("Skewness: ", self.nb.skewness)
        print("Distribution: ", self.nb.distribution)
        print("Majority: ", self.nb.majority)
        print("Minority: ", self.nb.minority)

    def review_to_words(self, raw_review):
        no_url = re.sub("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", "", raw_review)

        # Remove numerics
        letters_only = re.sub("[^a-zA-Z]", " ", no_url)

        # to lowercase
        words = letters_only.lower().split()

        # remove stop words - the, of , a ....
        stops = set(stopwords.words("english"))

        meaningful_words = [w for w in words if not w in stops]

        return (" ".join(meaningful_words))
