예제 #1
0
def sent_anly_prediction():
    if request.method == 'POST':
        text = request.form['text']
        Sentiment = ''
        max_review_length = 500
        print(text + "\n")
        start = time.time()
        Util = util()
        end2 = time.time()
        ### total time taken
        print(f"Runtime of util is {end2 - start}")
        t = [Util.text_util_final(text)]
        end3 = time.time()
        ### total time taken
        print(f"Runtime of text_util_final is {end3 - start}")
        dict_tfidf = Dict_Tfidf(PATH)
        end4 = time.time()
        ### total time taken
        print(f"Runtime of Dict_Tfidf is {end4 - start}")
        vectorizer = dict_tfidf.create_dict_tfidf()
        end5 = time.time()
        ### total time taken
        print(f"Runtime of create_dict_tdidf is {end5 - start}")
        vector_tfidf = vectorizer.transform(t)
        end6 = time.time()
        ### total time taken
        print(f"Runtime of transform is {end6 - start}")
        label = model.predict(vector_tfidf)
        class1 = str(label[0])
        end7 = time.time()
        ### total time taken
        print(f"Runtime of predict is {end7 - start}")
        end = time.time()
        ### total time taken
        print(f"Runtime of the program is {end - start}")
        print(class1 + "\n")
        ###Compare label if == "1" => negative and set sad emoji else positive and set smiling emoji
        if class1 == "1":
            sentiment = 'Negative'
            img_filename = os.path.join(app.config['UPLOAD_FOLDER'],
                                        'Sad_Emoji.png')
        else:
            sentiment = 'Positive'
            img_filename = os.path.join(app.config['UPLOAD_FOLDER'],
                                        'Smiling_Emoji.png')
        print(sentiment)
        #####write output contains class and content (input data) into csv file
        with open("output/output_sentiment_analysis.csv",
                  'a',
                  newline='',
                  encoding="utf8",
                  errors="ignore") as out:
            filenames = ['class', 'content']
            writer = csv.DictWriter(out, filenames)
            writer.writeheader()
            writer.writerow({'class': sentiment, 'content': text.strip("\n")})
    return render_template('home.html',
                           text=text,
                           sentiment=sentiment,
                           image=img_filename)
예제 #2
0
def classify():
    ap = argparse.ArgumentParser()
    ap.add_argument("-t", "--text", help="nhap doan text!!!")
    args = vars(ap.parse_args())
    text = args["text"]
    Util = util()
    text = Util.text_util_final(text)

    if (text.find("nhưng", 0, len(text)) > -1
            or text.find("nhung", 0, len(text)) > -1):
        exception.exist_nhung(text)
        exit(0)
    if ((text.find("được mỗi", 0,
                   len(text) > -1))
            or (text.find("được cái", 0, len(text)) > -1)):
        print("Day la binh luan tieu cuc!")
        exit(0)

    text = [Util.text_util_final(text)]
    dict_tfidf = Dict_Tfidf(PATH)
    vectorizer = dict_tfidf.create_dict_tfidf()
    vector_tfidf = vectorizer.transform(text)

    model = joblib.load('./models/best_model.pkl')
    label = model.predict(vector_tfidf)
    if (label[0] == 1):
        print("Day la binh luan tieu cuc!")
    else:
        print("Day la binh luan tich cuc!")
예제 #3
0
    def create_dict_tfidf(self):
        start8 = time.time()
        ds = DataSource()
        dict_data = pd.DataFrame(ds.load_data(self.path)).review
        dict_data = dict_data.tolist()
        Util = util()
        A = []
        for i in range(len(dict_data)):
            text = dict_data[i]
            text = Util.text_util_final(text)
            A.append(text)

        end8 = time.time()
        # total time taken
        print(f"Runtime of preprocess is {end8 - start8}")

        vectorizer = TfidfVectorizer(max_features=100000, ngram_range=(1, 3))
        end9 = time.time()
        # total time take
        print(f"Runtime of tfidfvectorizer is {end9 - start8}")

        vectorizer.fit(A)
        end10 = time.time()
        # total time taken
        print(f"Runtime of fit is {end10 - start8}")

        return vectorizer
예제 #4
0
def application(environ, start_response):
    # Set up initial parameters and utility instances
    request = webob.Request(environ)
    params = json.loads(request.body) if request.body else {
        'username': '******'
    }  # Default to USDI
    utl = utils.util(environ, start_response)

    try:

        # Authorize the Twitter API `auth` object using OAuth tokens
        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_secret)

        # Set the `auth` object for tweepy's API
        twitter_api = tweepy.API(auth)

        # Perform a search
        ## tweets = twitter_api.search(q='from%3A{usernm}%20OR%20%23{usernm}%20OR%20%40{usernm}'
        ##    .format(usernm=params['username'])) # from:nasa OR #nasa OR @nasa
        tweets = twitter_api.search(q='from%3A{usernm}'.format(
            usernm=params['username']))  # from:interior

        # Iterate over first 6 tweets and add information to response payload
        payload = []
        for tweet in tweets[:6]:
            tweet_props = {
                "text": tweet.text,  # Tweet contents
                "time": tweet.created_at.strftime(
                    "%-m/%-d/%y at %I:%M %p"),  # Datetime formatted
                "num_words": len(tweet.text.split(
                ))  # Number of words in content split by whitespace
            }
            payload.append(tweet_props)

        # Log a successful result
        res = True

    except Exception as e:
        res = False
        err_texts = []
        excptn, err_msg, tb = sys.exc_info()
        for file_name, line_no, func_name, text in traceback.extract_tb(tb):
            err_text = '{} (LINE {}) in {}() -> `{}`'.format(
                os.path.basename(file_name), line_no, func_name, text)
            err_texts.append(err_text)
        err_texts.insert(0, '{} {}'.format(e.message, type(e)))
        tb_error = '\n'.join(err_texts)

    finally:
        data = collections.OrderedDict()
        data['result'] = str(res)
        data[
            'traceback'] = tb_error if not res else None  # Comment out in production
        if res:
            data['payload'] = payload
        resp = json.dumps(data)
        return utl.send_data(resp)
 def create_dict_tfidf(self):
     ds = DataSource()
     dict_data = pd.DataFrame(ds.load_data(self.path)).review
     dict_data = dict_data.tolist()
     Util = util()
     A = []
     for i in range(len(dict_data)):
         text = dict_data[i]
         text = Util.text_util_final(text)
         A.append(text)
     vectorizer = TfidfVectorizer(max_features=100000, ngram_range=(1, 3))
     vectorizer.fit(A)
     return vectorizer
예제 #6
0
def create_tfidf_vector(path):
    dict_tfidf = Dict_Tfidf(PATH)
    vectorizer = dict_tfidf.create_dict_tfidf()

    #load du lieu
    ds = DataSource()
    train_data = pd.DataFrame(ds.load_data(path))
    x_train = train_data.review
    y_train = train_data.label

    # chuan hoa lai du lieu
    x_train = x_train.tolist()
    Util = util()
    A = []
    for i in range(len(x_train)):
        text = x_train[i]
        text = Util.text_util_final(text)
        A.append(text)

    #Chuyen ve vector tfidf
    x_train_tfidf = vectorizer.transform(A)
    return x_train_tfidf, y_train
예제 #7
0
 def __init__(self, CV_fold=5, produceResult=False, forceRegen=False):
     self.forceRegen = forceRegen
     if self.forceRegen:
         print(
             'Dataset force regeneration is active, regenerating all datasets'
         )
     elif not self.forceRegen:
         print(
             'Dataset force regeneration deactivated, using existing datasets'
         )
     #Create working directories
     self.dirCreate()
     #Instantiate utility class
     self.utils = util()
     self.readData()
     self.raw_standarization()
     self.class_split()
     if produceResult or self.forceRegen:
         self.correlationMx()
         self.CV_feature_plot()
         self.feature_selection()
     self.CV_datasetGen(fold=CV_fold, pca_type='Vani', test_ratio=0.2)
     self.CV_datasetGen(fold=CV_fold, pca_type='Ker', test_ratio=0.2)
예제 #8
0
 def __init__(self, bot):
     self.bot = bot
     self.db = ub.DBUtils()
     self.u = ii.util(self.bot)
     self.client = api.Client(None)
     self.utils = ii.util(self.bot)