Пример #1
0
def main():
    args = arg_parse()
    infile = args.file
    links = args.links
    outdir = args.output_dir

    outfile = scrape_to_json(infile, outdir, links, scraper)
    analyze(outfile)
Пример #2
0
def main_page():
    if request.method == 'POST':
        hashtag = '#' + request.form['user_search']
        city = request.form['city'].title()
        radius = request.form['radius']
        tweets = search(hashtag, city, radius)
        if len(tweets) == 0:
            return render_template('no_results.html',
                                   city=city,
                                   topic=request.form['user_search'])
        sentiments = []
        for tweet in tweets:
            sentiments.append(analyze(tweet))
        total_score = 0
        total_magnitude = 0
        for score, magnitude in sentiments:
            total_score += score
            total_magnitude += magnitude
        avg_score = total_score / len(sentiments)
        avg_magnitude = total_magnitude / len(sentiments)
        sentiment = overall_sentiment(avg_score, avg_magnitude)
        return render_template('results.html',
                               city=city,
                               sentiment=sentiment,
                               topic=request.form['user_search'],
                               tweets=tweets)
    else:
        return render_template('search.html')
Пример #3
0
def initialLoadDB(history):
    chromeData = history
    h = html2text.HTML2Text()

    # Ignore converting links from HTML
    h.ignore_links = True
    h.ignore_images = True
    h.ignore_anchors = True
    h.skip_internal_links = True

    website_text = []

    for link in chromeData:
        try:
            if "google.com" in link['url'] or "slack.com" in link['url']:
                continue
            query = db.webtext.find({"parent_url": link['url']})
            if (query.count() == 0):
                child_links = []
                if link['url'][:4] == 'http':
                    html = requests.get(link['url'])
                    html = html.text
                    parent_html = str(html)
                    plain_text = h.handle(html).strip()
                else:
                    continue

                soup = BeautifulSoup(html, "html.parser")
                children = set()
                for childLink in soup.findAll('a'):
                    if childLink.get('href') is not None and childLink.get(
                            'href')[:4] == 'http':
                        if childLink.get('href').count(
                                "/") > 4 or ".html" in childLink.get('href'):
                            children.add(childLink.get('href'))

                update_query = {'parent_url': link['url']}
                class_data = classifyForDB([link['url'], plain_text])
                sent, magnitude = sentiment.analyze(plain_text)
                post_data = {
                    'parent_url': link['url'],
                    'parent_text': plain_text,
                    'parent_html': parent_html,
                    'classify_data': class_data,
                    'child_links': list(children),
                    'depth': 0,
                    'searched': False
                }

                result = db.webtext.update(update_query,
                                           post_data,
                                           upsert=True)

                print(result)
            else:
                continue

        except Exception as e:
            print(e)
Пример #4
0
def home(request):
    response = {}
    text = ''
    if request.method == 'POST':
        text = request.POST['text']
        response = sentiment.analyze(text)

    return render(request, 'app/index.html', response)
Пример #5
0
def analyze(request):
    response = sentiment.analyze(request.body)

    #return HttpResponse(unescape(json.dumps(response)), content_type='application/json')
    return render(request,
                  'app/indexpartial-vert.html',
                  response,
                  content_type='text/html')
Пример #6
0
def dashboard():
    if 'email' not in session:
        return redirect(
            url_for('login'))  #requires an account to access this page
    resume = True
    photo = True
    emotions = False
    photo_feedback = False
    resume_feedback = False
    score = False
    error = False
    if request.method == 'POST':
        user_info = users.find_one({'email': session['email']})
        if 'resume' in request.form:
            if user_info['resume'] != '':
                resume_data = user_info['resume']
                fh = open("sentiment_results.txt", 'w')
                parsed_output = json.loads(resume_data)
                fh.write(
                    json.dumps(parsed_output["DocumentElement"]["Resume"]
                               ["Experience"],
                               indent=4,
                               sort_keys=True))
                fh.close()
                results = sentiment.analyze("sentiment_results.txt")
                score = results[0]
                resume_feedback = analyzeResume(score)
            else:
                resume = False
        if 'photo' in request.form:
            if fs.exists(user_info['image']):
                image_data = fs.get(user_info['image'])
                f = open(UPLOAD_FOLDER + user_photo, 'wb')
                f.write(image_data.read())
                f.close()
                emotions = detect_face.detect_faces(UPLOAD_FOLDER + user_photo)
                photo_feedback = analyzeRelease(
                    emotions["anger"], emotions["joy"], emotions["surprise"],
                    emotions["blurred"],
                    emotions["headwear"]) if emotions else False
                error = False if photo_feedback else True

            else:
                photo = False
    return render_template("dashboard.html",
                           emotions=emotions,
                           photo=photo,
                           score=score,
                           resume=resume,
                           photo_feedback=photo_feedback,
                           resume_feedback=resume_feedback,
                           error=error)
Пример #7
0
def compare(phrase, expected_bool, file_desc, expected_result=None):
    if file_desc == "path planning":
        is_command = path.isLocCommand(phrase)
        if is_command == expected_bool:
            if is_command:
                result = path.process_loc(phrase)
                return check_result(phrase, result, expected_result)
            else:
                return 1
        else:
            print("The phrase is: " + str(phrase) +
                  ", correct output should be: " + str(expected_bool) +
                  ", instead we have: " + str(is_command))
            return 0
    elif file_desc == "is question":
        result = nlp_util.is_question(phrase)
        return check_result(phrase, result, (expected_bool, expected_result))
    elif file_desc == "sentiment":
        label, result = sentiment.analyze(phrase)
        return check_result(phrase, label, expected_bool)
    else:
        print("invalid file_desc")
        return 0
Пример #8
0
def getUrls(urls):
    h = html2text.HTML2Text()

    # Ignore converting links from HTML
    h.ignore_links = True
    h.ignore_images = True
    h.ignore_anchors = True
    h.skip_internal_links = True

    website_text = []

    for url in urls:
        # try:
        db.webtext.delete_one({"parent_url": url})
        query = db.webtext.find({"parent_url": url})
        if (query.count() == 0):
            child_links = []
            if url[:4] == 'http':
                html = requests.get(url)
                html = html.text
                parent_html = str(html)
                plain_text = h.handle(html).strip()
            else:
                continue

            soup = BeautifulSoup(html, "html.parser")
            children = set()
            for childLink in soup.findAll('a'):
                if childLink.get('href') is not None and childLink.get(
                        'href')[:4] == 'http':
                    if childLink.get('href').count(
                            "/") > 4 or ".html" in childLink.get('href'):
                        children.add(childLink.get('href'))

            update_query = {'parent_url': url}
            class_data = classifyForDB([url, plain_text])
            sent, magnitude = sentiment.analyze(plain_text)
            if sent is None or magnitude is None:
                raise ("Google analysis down")

            post_data = {
                'parent_url': url,
                'parent_text': plain_text,
                'parent_html': parent_html,
                'classify_data': class_data,
                'sentiment': sent,
                'magnitude': magnitude,
                'child_links': list(children),
                'depth': 0,
                'searched': False
            }

            result = db.webtext.update(update_query, post_data, upsert=True)
            result = {
                k: post_data[k]
                for k in ["classify_data", "sentiment", "magnitude"]
            }

            website_text.append(result)
        else:
            data = query.next()
            result = {"classify_data": data["classify_data"]}
            if "sentiment" in data:
                result["sentiment"] = data["sentiment"]
                result["magnitude"] = data["magnitude"]
            website_text.append(result)

    # except Exception as e:
    #     print(e)

    return website_text
Пример #9
0
from urllib.request import urlopen
import json
import sentiment

data = urlopen('https://api.github.com/events').read().decode('utf8')
response = json.loads(data)

for event in response:
    if event['type'] == 'PushEvent':
        commits = event['payload']['commits']
        if len(commits) > 0:
            for commit in commits:
                msg = commit['message']
                if sentiment.analyze(msg)['score'] < 0:
                    print(
                        '========================================================'
                    )
                    print(msg)

print('========================================================')
Пример #10
0
from urllib.request import urlopen
import json
import sentiment

data = urlopen('https://api.github.com/events').read().decode('utf8')
response = json.loads(data)


for event in response:
    if event['type'] == 'PushEvent':
        commits = event['payload']['commits']
        if len(commits) > 0:
            for commit in commits:
                msg = commit['message']
                if sentiment.analyze(msg)['score'] < 0:
                    print('========================================================')
                    print(msg)

print('========================================================')
Пример #11
0
import requests
import sentiment
import sys
import os
import datetime
import re

while (True):
    response = requests.post(
        'http://queue:8080/memq/server/queues/bitcoin2/dequeue')
    if (response.status_code == 200):
        message_str = response.json()['body']
        print(message_str)
        filestr = re.escape(message_str)
        command_str = 'gsutil cp gs://isye-7406/' + filestr + '.csv .'
        os.system(command_str)
        sentiment.analyze(message_str)
        command_str = 'gsutil cp ./' + filestr + '_nlp.csv gs://isye-7406/nlp/' + filestr + '_nlp.csv'
        print(command_str)
        os.system(command_str)
    else:
        break
sys.exit(0)
Пример #12
0
def get_sentiment_data_stream():
    # amt = int(request.args.get('split'))
    amt = 3
    name = request.args.get('name')
    data = request.get_data(cache=False)
    ress = speechToText.speechToText(data)
    res = ress.split()
    splits = [
        " ".join(res[0:i]) for i in range(
            len(res) // amt,
            len(res) + len(res) // amt,
            len(res) // amt)
    ]
    data = sentiment.analyze(nlu, ress)
    keys = [data["keywords"][i]["text"] for i in range(len(data["keywords"]))]
    for i in range(len(splits)):
        splits[i] = sentiment.analyze(
            nlu, splits[i])["emotion"]["document"]["emotion"]

    items = []
    items.append({
        "id":
        "anger",
        "data": [{
            "x": i,
            "y": splits[i]["anger"]
        } for i in range(len(splits))]
    })
    items.append({
        "id":
        "sadness",
        "data": [{
            "x": i,
            "y": splits[i]["sadness"]
        } for i in range(len(splits))]
    })
    items.append({
        "id":
        "joy",
        "data": [{
            "x": i,
            "y": splits[i]["joy"]
        } for i in range(len(splits))]
    })
    items.append({
        "id":
        "fear",
        "data": [{
            "x": i,
            "y": splits[i]["fear"]
        } for i in range(len(splits))]
    })
    items.append({
        "id":
        "disgust",
        "data": [{
            "x": i,
            "y": splits[i]["disgust"]
        } for i in range(len(splits))]
    })
    ret = {"items": items, "keywords": keys, "name": name, "text": ress}
    cache[name] = ret
    return ret
Пример #13
0
def get_sentiment_data():
    data = request.get_data(cache=False)
    res = speechToText.speechToText(data)
    fin_res = sentiment.analyze(nlu, res)
    return fin_res["emotion"]["document"]["emotion"]
Пример #14
0
    print(
        "--- Opened a CSV file to store the results of your sentiment analysis... \n"
    )

    # tidy up the Tweets and send each to the AYLIEN Text API
    for c, result in enumerate(results, start=1):
        tweet = result.text
        tidy_tweet = tweet.strip()
        tweet_time = result.created_at
        tweet_id = result.id

        if not tweet:
            print('Empty Tweet')
            continue

        response = analyze(tidy_tweet)
        csv_writer.writerow({
            "Tweet_ID": tweet_id,
            "Time": tweet_time,
            'Tweet': tidy_tweet,
            'Sentiment': response['compound'],
        })

        print("Analyzed Tweet {}".format(c))

positive = 0
negative = 0
neutral = 0
# count the data in the Sentiment column of the CSV file
with open(file_name, 'r') as data:
    counter = Counter()